## Hate Speech Detection Using BERT

This notebook demonstrates how to use a pre-trained BERT model to detect hate speech in social media posts.
We will:

1. Load and preprocess a dataset of social media posts.
2. Fine-tune a pre-trained BERT model using `transformers` and `torch`.
3. Evaluate the model's performance.
4. Visualize classification results.

### Dataset
The dataset consists of 1,000 tweets, labeled as:
- `1`: Hate speech
- `0`: Non-hate speech

Let's begin!


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings

warnings.filterwarnings("ignore")

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:
# Load dataset
df = pd.read_csv("sample_twitter_posts.csv")

# Display first few rows
df.head()


In [None]:
# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenization function
def tokenize_data(texts, labels, tokenizer, max_len=128):
    encodings = tokenizer(texts.tolist(), truncation=True, padding=True, max_length=max_len)
    return encodings, labels.tolist()

# Apply tokenization
X_train, X_test, y_train, y_test = train_test_split(df["post"], df["label"], test_size=0.2, random_state=42)
train_encodings, train_labels = tokenize_data(X_train, y_train, tokenizer)
test_encodings, test_labels = tokenize_data(X_test, y_test, tokenizer)


In [None]:
class HateSpeechDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# Create dataset objects
train_dataset = HateSpeechDataset(train_encodings, train_labels)
test_dataset = HateSpeechDataset(test_encodings, test_labels)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [None]:
# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model.to(device)

# Optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()


In [None]:
# Training function
def train(model, data_loader, optimizer, loss_fn, device, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in data_loader:
            optimizer.zero_grad()
            input_ids, attention_mask, labels = batch["input_ids"].to(device), batch["attention_mask"].to(device), batch["labels"].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}: Loss = {total_loss / len(data_loader)}")

# Train the model
train(model, train_loader, optimizer, loss_fn, device)


In [None]:
# Evaluation function
def evaluate(model, data_loader, device):
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids, attention_mask, labels = batch["input_ids"].to(device), batch["attention_mask"].to(device), batch["labels"].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            preds.extend(torch.argmax(outputs.logits, dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    print(classification_report(true_labels, preds))
    return true_labels, preds

# Run evaluation
true_labels, preds = evaluate(model, test_loader, device)


In [None]:
# Confusion Matrix
cm = confusion_matrix(true_labels, preds)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Non-Hate", "Hate"], yticklabels=["Non-Hate", "Hate"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# Hate Speech Distribution
df["label"].value_counts().plot(kind="bar", color=["blue", "red"])
plt.xticks(ticks=[0, 1], labels=["Non-Hate", "Hate"], rotation=0)
plt.ylabel("Count")
plt.title("Distribution of Hate Speech Labels")
plt.show()
