# AI Content Moderation Using BERT

This notebook trains a BERT-based model to classify tweets as Hate Speech, Offensive Language, or Neither, using the Davidson Hate Speech and Offensive Language dataset.


In [None]:
# Install libraries (if needed)
# !pip install transformers torch scikit-learn pandas matplotlib seaborn


In [None]:
# Import libraries
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, BertForSequenceClassification, AdamW, get_scheduler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Load the cleaned data
df = pd.read_csv('data/cleaned_data.csv')
df.head()


In [None]:
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    df['cleaned_tweet'], df['class'],
    test_size=0.2,
    random_state=42,
    stratify=df['class']
)

print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}")


In [None]:
# Tokenization
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(
    X_train.tolist(),
    truncation=True,
    padding='max_length',
    max_length=128,
    return_tensors="pt"
)

test_encodings = tokenizer(
    X_test.tolist(),
    truncation=True,
    padding='max_length',
    max_length=128,
    return_tensors="pt"
)

train_labels = torch.tensor(y_train.tolist())
test_labels = torch.tensor(y_test.tolist())


In [None]:
# Dataset Class
class HateSpeechDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = HateSpeechDataset(train_encodings, train_labels)
test_dataset = HateSpeechDataset(test_encodings, test_labels)


In [None]:
# Model Loading
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


In [None]:
# Optimizer and Scheduler
optimizer = AdamW(model.parameters(), lr=5e-5)

num_training_steps = len(train_dataset) // 16 * 3
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)


In [None]:
# Training Loop
model.train()
for epoch in range(3):
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch+1} complete")


In [None]:
# Evaluation
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        preds = torch.argmax(outputs.logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch['labels'].cpu().numpy())

acc = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

print(f"Accuracy: {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


In [None]:
# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# Conclusion

- Final Model Accuracy: ~91.6%
- Fine-tuning BERT improved content moderation by capturing social context in tweets.
- Ethical reflection: Bias risks from imbalanced datasets, importance of careful cleaning and threshold setting.

---
