# Hate Speech Detection Training Notebook\nThis notebook trains a DistilBERT classifier to detect hate speech, offensive language, and neither.\nIt includes preprocessing, training, evaluation, and inference demo.

In [None]:
!pip install transformers datasets torch scikit-learn pandas

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load Dataset

In [3]:
# Load hate speech detection dataset
df = pd.read_csv("/content/drive/MyDrive/Yash/labeled_data.csv")

# Keep only necessary columns (tweet text and class)
df = df[["tweet", "class"]].dropna()
df = df.rename(columns={"tweet": "text"})

# Use class labels directly (0=hate_speech, 1=offensive_language, 2=neither)
df["label"] = df["class"]

# Simple preprocessing (strip, normalize spaces)
df["text"] = df["text"].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip()

print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['label'].value_counts()}")
df.head()

Dataset shape: (24783, 3)
Class distribution:
label
1    19190
2     4163
0     1430
Name: count, dtype: int64


Unnamed: 0,text,class,label
0,!!! RT @mayasolovely: As a woman you shouldn't...,2,2
1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...,1,1
2,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...,1,1
3,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...,1,1
4,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...,1,1


## Split Dataset

In [4]:
train_df, test_df = train_test_split(df, test_size=0.15, stratify=df['label'], random_state=42)
train_df, val_df  = train_test_split(train_df, test_size=0.15, stratify=train_df['label'], random_state=42)

print("Dataset sizes:", len(train_df), len(val_df), len(test_df))

Dataset sizes: 17905 3160 3718


## Convert to Hugging Face Datasets

In [5]:
train_ds = Dataset.from_pandas(train_df[['text','label']])
val_ds   = Dataset.from_pandas(val_df[['text','label']])
test_ds  = Dataset.from_pandas(test_df[['text','label']])

## Tokenization

In [6]:
MODEL = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(MODEL)

def tok(batch):
    return tokenizer(batch['text'], truncation=True, padding='max_length', max_length=256)

train_ds = train_ds.map(tok, batched=True)
val_ds   = val_ds.map(tok, batched=True)
test_ds  = test_ds.map(tok, batched=True)

columns = ['input_ids','attention_mask','label']
train_ds.set_format(type='torch', columns=columns)
val_ds.set_format(type='torch', columns=columns)
test_ds.set_format(type='torch', columns=columns)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/17905 [00:00<?, ? examples/s]

Map:   0%|          | 0/3160 [00:00<?, ? examples/s]

Map:   0%|          | 0/3718 [00:00<?, ? examples/s]

## Define Model

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=3)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Training Setup

In [8]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    p, r, f, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {'precision': p, 'recall': r, 'f1': f, 'accuracy': acc}

training_args = TrainingArguments(
    output_dir='./out',
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    eval_strategy='epoch',
    save_strategy='epoch',
    logging_steps=50,
    fp16=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics
)

## Train Model

In [9]:
trainer.train()

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myashchandarana6733[0m ([33myashchandarana6733-roundpixel[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2593,0.232462,0.884646,0.910443,0.892084,0.910443
2,0.2227,0.234268,0.893144,0.913291,0.896693,0.913291
3,0.149,0.248025,0.908057,0.909494,0.908751,0.909494


TrainOutput(global_step=840, training_loss=0.23247511330105008, metrics={'train_runtime': 1254.488, 'train_samples_per_second': 42.818, 'train_steps_per_second': 0.67, 'total_flos': 3557806606702080.0, 'train_loss': 0.23247511330105008, 'epoch': 3.0})

In [10]:
# Save the trained model
trainer.save_model("./hate_speech_model")

## Evaluate on Test Set

In [11]:
metrics = trainer.predict(test_ds)
print(metrics.metrics)

{'test_loss': 0.2438078671693802, 'test_precision': 0.9170285260229512, 'test_recall': 0.9209252286175363, 'test_f1': 0.9186186433772462, 'test_accuracy': 0.9209252286175363, 'test_runtime': 29.698, 'test_samples_per_second': 125.194, 'test_steps_per_second': 1.987}


## Inference Demo

In [19]:
import torch

# Get the device your model is on
device = next(model.parameters()).device

def predict(text):
    inputs = tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=256
    ).to(device)

    outputs = model(**inputs)
    pred = outputs.logits.argmax(dim=1).item()

    labels = {0: 'hate_speech', 1: 'offensive_language', 2: 'neither'}
    return labels[pred]

print(predict("You are such an idiot!"))
print(predict("Those immigrants are destroying our country"))
print(predict("I had a great day with my family!"))


offensive_language
hate_speech
neither


In [13]:
import shutil
import os
from google.colab import files

# Compress the directory
output_filename = 'offensive_compressed'
shutil.make_archive(output_filename, 'zip', '/content/out')

# Download the compressed file
files.download(f'{output_filename}.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>