In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('dataset.csv')
df = df.drop(columns=['annotaters'], errors='ignore')

In [2]:
def preprocess_data(df):
    df_clean = df.copy()
    
    df_clean['hate'] = df_clean['label'].map({'h': 1, 'nh': 0}) # binary hate, non-hate
    
    target_map = {'p': 0, 'e': 1, 'r': 2} # numeric mapping of target categories
    df_clean['target'] = df_clean['target'].str.lower().str.strip()
    
    # non-hate labels have no target
    df_clean['target'] = (
        df_clean['target']
        .map(target_map)
        .where(df_clean['target'].isin(target_map.keys()))
    )
    df_clean['target'] = df_clean['target'].fillna(-100).astype(int)
    
    invalid_hate_mask = (df_clean['hate'] == 1) & (df_clean['target'] == -100)
    df_clean.loc[invalid_hate_mask, 'hate'] = 0
    
    return df_clean

In [3]:
df_clean = preprocess_data(df)

In [4]:
import torch
from transformers import AutoTokenizer, AutoModel, AdamW
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
import numpy as np

class TurkishHateSpeechDataset(Dataset):
    def __init__(self, texts, hate_labels, target_labels, tokenizer, max_len=128):
        self.texts = texts
        self.hate_labels = hate_labels
        self.target_labels = target_labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'hate_labels': torch.tensor(self.hate_labels[idx], dtype=torch.float),
            'target_labels': torch.tensor(self.target_labels[idx], dtype=torch.long)
        }

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class TurkishHateBERT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
        self.hate_head = torch.nn.Linear(768, 1)
        self.target_head = torch.nn.Linear(768, 3)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        return self.hate_head(pooled_output), self.target_head(pooled_output)

In [6]:
import nlpaug.augmenter.word as naw
import random

turkish_augmenter = naw.ContextualWordEmbsAug(
    model_path='bert-base-multilingual-cased',
    action="substitute",
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

In [None]:
def augment_sentences(df, augmenter, aug_p=0.3, max_aug_per_sample=1):
    augmented_rows = []
    printed = 0

    for i, row in df.iterrows():
        if row['hate'] == 1 and random.random() < aug_p:
            try:
                for _ in range(max_aug_per_sample):
                    aug_text = augmenter.augment(row['text'])
                    if isinstance(aug_text, list):
                        aug_text = aug_text[0]
                    new_row = row.copy()
                    new_row['text'] = aug_text
                    augmented_rows.append(new_row)

                    if print_examples and printed < 10:  
                        print(f"Original : {row['text']}")
                        print(f"Augmented: {aug_text}")
                        print("-" * 60)
                        printed += 1
            except Exception as e:
                print(f"Augmentation failed for row {i}: {e}")
                continue
    if augmented_rows:
        aug_df = pd.DataFrame(augmented_rows)
        print(f"Added {len(aug_df)} augmented samples.")
        return pd.concat([df, aug_df], ignore_index=True)
    else:
        return df


In [None]:
def prepare_loaders_augmented(train_df, val_df, tokenizer, batch_size=16):
    train_dataset = TurkishHateSpeechDataset(
        train_df['text'].values,
        train_df['hate'].values,
        train_df['target'].values,
        tokenizer
    )
    val_dataset = TurkishHateSpeechDataset(
        val_df['text'].values,
        val_df['hate'].values,
        val_df['target'].values,
        tokenizer
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    return train_loader, val_loader

In [None]:
def add_hard_negatives(model, df_train, tokenizer, device, threshold=0.3, max_add=100):
    model.eval()
    non_hate_df = df_train[df_train['hate'] == 0].copy()
    texts = non_hate_df['text'].tolist()
    hate_preds = []
    with torch.no_grad():
        for i in range(0, len(texts), 32):
            batch_texts = texts[i:i+32]
            encodings = tokenizer(batch_texts, return_tensors='pt', padding=True, truncation=True, max_length=128)
            input_ids = encodings['input_ids'].to(device)
            attention_mask = encodings['attention_mask'].to(device)
            hate_logits, _ = model(input_ids, attention_mask)
            probs = torch.sigmoid(hate_logits).detach().cpu().numpy().flatten()
            hate_preds.extend(probs)
    non_hate_df['hate_prob'] = hate_preds
    hard_negatives = non_hate_df[non_hate_df['hate_prob'] > threshold].copy()
    hard_negatives = hard_negatives.sample(min(len(hard_negatives), max_add), random_state=42)
    print(f"Adding {len(hard_negatives)} hard negatives to training data")
    df_new_train = pd.concat([df_train, hard_negatives], ignore_index=True)
    return df_new_train

In [None]:
def evaluate_model(model, dataloader, device):
    model.eval()
    hate_preds = []
    hate_probs = []
    true_hate = []
    target_preds = []
    true_target = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            hate_logits, target_logits = model(input_ids, attention_mask)
            batch_probs = torch.sigmoid(hate_logits.squeeze()).cpu().numpy()
            batch_preds = (batch_probs > 0.5).astype(int)
            hate_probs.extend(batch_probs)
            hate_preds.extend(batch_preds)
            true_hate.extend(batch['hate_labels'].cpu().numpy())
            target_probs = torch.softmax(target_logits, dim=1).cpu().numpy()
            batch_target_preds = np.argmax(target_probs, axis=1)
            target_preds.extend(batch_target_preds)
            true_target.extend(batch['target_labels'].cpu().numpy())
    target_mask = np.array(true_target) != -100
    filtered_target_pred = np.array(target_preds)[target_mask]
    filtered_true_target = np.array(true_target)[target_mask]
    return {
        'true_hate': true_hate,
        'pred_hate': hate_preds,
        'true_target': filtered_true_target,
        'pred_target': filtered_target_pred
    }

In [None]:
def train_model_with_hard_neg(model, train_df, val_df, tokenizer, device, epochs=4, batch_size=16, lr=2e-5, threshold=0.3):
    optimizer = AdamW(model.parameters(), lr=lr)
    hate_criterion = torch.nn.BCEWithLogitsLoss()
    target_criterion = torch.nn.CrossEntropyLoss(ignore_index=-100)
    model.to(device)
    for epoch in range(epochs):
        train_loader, val_loader = prepare_loaders_augmented(train_df, val_df, tokenizer, batch_size)
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            hate_labels = batch['hate_labels'].to(device)
            target_labels = batch['target_labels'].to(device)
            hate_logits, target_logits = model(input_ids, attention_mask)
            hate_loss = hate_criterion(hate_logits.squeeze(), hate_labels)
            target_mask = (target_labels != -100)
            if target_mask.any():
                target_loss = target_criterion(target_logits[target_mask], target_labels[target_mask])
            else:
                target_loss = torch.tensor(0.0).to(device)
            loss = hate_loss + target_loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs} - Train loss: {total_loss/len(train_loader):.4f}")
        val_metrics = evaluate_model(model, val_loader, device)
        print("\nValidation Metrics:")
        print(classification_report(val_metrics['true_hate'], val_metrics['pred_hate'], target_names=['Non-Hate', 'Hate']))
        print("\nTarget Classification (Hate Cases Only):")
        print(classification_report(val_metrics['true_target'], val_metrics['pred_target'], target_names=['Politics', 'Ethnicity', 'Religion']))
        # Add hard negatives for next epoch
        train_df = add_hard_negatives(model, train_df, tokenizer, device, threshold=threshold)
    return model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")

from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []

for fold, (train_idx, val_idx) in enumerate(skf.split(df_clean, df_clean['hate'])):
    print(f"\n\n=== Fold {fold+1} ===")
    
    df_train_fold = df_clean.iloc[train_idx].reset_index(drop=True)
    df_val_fold = df_clean.iloc[val_idx].reset_index(drop=True)
    
    print("Augmenting training fold...")
    df_train_augmented = augment_sentences(df_train_fold, turkish_augmenter, aug_p=0.3, max_aug_per_sample=1)
    
    model_fold = TurkishHateBERT()
    
    print("Training model with hard negatives...")
    trained_model = train_model_with_hard_neg(
        model_fold,
        df_train_augmented,
        df_val_fold,
        tokenizer,
        device,
        epochs=4,
        batch_size=16,
        lr=2e-5,
        threshold=0.3
    )
    
    val_loader = prepare_loaders_augmented(df_val_fold, df_val_fold, tokenizer, batch_size=16)[1]
    metrics = evaluate_model(trained_model, val_loader, device)
    fold_metrics.append(metrics)
    
    print(f"\nFold {fold+1} Classification Report (Validation):")
    print(classification_report(metrics['true_hate'], metrics['pred_hate'], target_names=['Non-Hate', 'Hate']))
    print(classification_report(metrics['true_target'], metrics['pred_target'], target_names=['Politics', 'Ethnicity', 'Religion']))

In [None]:
from sklearn.metrics import precision_recall_fscore_support

hate_precisions, hate_recalls, hate_f1s = [], [], []
target_precisions, target_recalls, target_f1s = [], [], []

for metrics in fold_metrics:
    # Hate speech metrics
    p, r, f1, _ = precision_recall_fscore_support(
        metrics['true_hate'], metrics['pred_hate'], average='binary', pos_label=1
    )
    hate_precisions.append(p)
    hate_recalls.append(r)
    hate_f1s.append(f1)

    # Target classification metrics (only on valid target labels)
    if len(metrics['true_target']) > 0:
        p_t, r_t, f1_t, _ = precision_recall_fscore_support(
            metrics['true_target'], metrics['pred_target'], average='macro', zero_division=0
        )
        target_precisions.append(p_t)
        target_recalls.append(r_t)
        target_f1s.append(f1_t)

# average hate speech metrics
avg_hate_precision = np.mean(hate_precisions)
avg_hate_recall = np.mean(hate_recalls)
avg_hate_f1 = np.mean(hate_f1s)

# average target classification metrics
avg_target_precision = np.mean(target_precisions) if target_precisions else float('nan')
avg_target_recall = np.mean(target_recalls) if target_recalls else float('nan')
avg_target_f1 = np.mean(target_f1s) if target_f1s else float('nan')

print("\n=== Average metrics across folds ===")
print(f"Hate Speech Detection - Precision: {avg_hate_precision:.4f}, Recall: {avg_hate_recall:.4f}, F1: {avg_hate_f1:.4f}")
print(f"Target Classification (hate cases) - Precision: {avg_target_precision:.4f}, Recall: {avg_target_recall:.4f}, F1: {avg_target_f1:.4f}")