# AI Text Detector - DistilBERT Model Training

This notebook trains a DistilBERT-based model for AI text detection.

## Model: DistilBERT-base-uncased
- **Strengths**: Faster inference, smaller size (66% fewer parameters than BERT)
- **Use Case**: Real-time applications with speed requirements
- **Architecture**: Distilled version of BERT, maintaining 97% of performance

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    DistilBertTokenizer, DistilBertForSequenceClassification,
    TrainingArguments, Trainer, EvalPrediction
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import time
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Load and prepare the dataset
print("Loading dataset...")
df = pd.read_csv('../Training_Essay_Data.csv')

print(f"Dataset shape: {df.shape}")
print(f"Class distribution:")
print(df['generated'].value_counts())

# Prepare data
texts = df['text'].tolist()
labels = df['generated'].tolist()

print(f"\nTotal samples: {len(texts)}")
print(f"Sample text length: {len(texts[0])} characters")

In [None]:
# Split the data (same split as RoBERTa for fair comparison)
X_train, X_temp, y_train, y_temp = train_test_split(
    texts, labels, test_size=0.3, random_state=42, stratify=labels
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")

# Check distribution
print(f"\nTraining set distribution:")
print(f"Human: {y_train.count(0)}, AI: {y_train.count(1)}")
print(f"Validation set distribution:")
print(f"Human: {y_val.count(0)}, AI: {y_val.count(1)}")

In [None]:
# Initialize DistilBERT tokenizer and model
MODEL_NAME = 'distilbert-base-uncased'
MAX_LENGTH = 512  # DistilBERT's maximum sequence length

print(f"Loading {MODEL_NAME} tokenizer and model...")
tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)
model = DistilBertForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)

print(f"Model loaded successfully!")
print(f"Model parameters: {model.num_parameters():,}")
print(f"Model size compared to BERT: ~66% smaller")

In [None]:
# Create custom dataset class
class AIDetectionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Create datasets
print("Creating datasets...")
train_dataset = AIDetectionDataset(X_train, y_train, tokenizer, MAX_LENGTH)
val_dataset = AIDetectionDataset(X_val, y_val, tokenizer, MAX_LENGTH)
test_dataset = AIDetectionDataset(X_test, y_test, tokenizer, MAX_LENGTH)

print("Datasets created successfully!")

In [None]:
# Define metrics function
def compute_metrics(eval_pred: EvalPrediction):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    accuracy = accuracy_score(labels, predictions)
    
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

print("Metrics function defined.")

In [None]:
# Set up training arguments (optimized for DistilBERT)
training_args = TrainingArguments(
    output_dir='../models/distilbert_results',
    num_train_epochs=3,
    per_device_train_batch_size=16,  # Can use larger batch size due to smaller model
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='../models/distilbert_logs',
    logging_steps=100,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    push_to_hub=False,
    dataloader_num_workers=0,  # Set to 0 for Windows compatibility
    fp16=torch.cuda.is_available(),  # Use mixed precision if GPU available
    learning_rate=2e-5,  # Slightly higher learning rate for DistilBERT
)

print("Training arguments configured.")
print(f"Batch size: {training_args.per_device_train_batch_size} (larger than RoBERTa due to smaller model)")

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")
print("DistilBERT should train faster than RoBERTa (expect 20-40 minutes).")

# Measure training time
start_time = time.time()

# Train the model
training_result = trainer.train()

end_time = time.time()
training_time = end_time - start_time

print("Training completed!")
print(f"Training loss: {training_result.training_loss:.4f}")
print(f"Training time: {training_time/60:.1f} minutes")

In [None]:
# Evaluate on test set
print("Evaluating on test set...")
start_eval = time.time()
test_results = trainer.evaluate(test_dataset)
end_eval = time.time()
eval_time = end_eval - start_eval

print("\nTest Results:")
for key, value in test_results.items():
    if key.startswith('eval_'):
        metric_name = key.replace('eval_', '').title()
        print(f"{metric_name}: {value:.4f}")

print(f"\nEvaluation time: {eval_time:.2f} seconds")
print(f"Speed: {len(X_test)/eval_time:.1f} samples/second")

In [None]:
# Get detailed predictions for analysis
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = predictions.label_ids

# Classification report
print("\nDetailed Classification Report:")
print(classification_report(y_true, y_pred, 
                          target_names=['Human', 'AI Generated'],
                          digits=4))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print(f"\nConfusion Matrix:")
print(cm)

In [None]:
# Visualize confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', 
            xticklabels=['Human', 'AI Generated'],
            yticklabels=['Human', 'AI Generated'])
plt.title('DistilBERT Model - Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()

# Calculate additional metrics
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp)
sensitivity = tp / (tp + fn)
precision = tp / (tp + fp)
f1 = 2 * (precision * sensitivity) / (precision + sensitivity)

print(f"\nAdditional Metrics:")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1-Score: {f1:.4f}")

In [None]:
# Inference speed benchmark
def benchmark_inference(model, tokenizer, test_texts, device, num_samples=100):
    """Benchmark inference speed"""
    model.to(device)
    model.eval()
    
    # Take random sample for benchmarking
    sample_texts = test_texts[:num_samples]
    
    start_time = time.time()
    
    with torch.no_grad():
        for text in sample_texts:
            # Tokenize
            encoding = tokenizer(
                text,
                truncation=True,
                padding='max_length',
                max_length=512,
                return_tensors='pt'
            )
            
            input_ids = encoding['input_ids'].to(device)
            attention_mask = encoding['attention_mask'].to(device)
            
            # Predict
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    
    end_time = time.time()
    total_time = end_time - start_time
    
    return total_time, total_time / num_samples

print("Benchmarking inference speed...")
total_time, avg_time = benchmark_inference(model, tokenizer, X_test, device, 100)

print(f"\nInference Speed Benchmark (100 samples):")
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per sample: {avg_time*1000:.1f} ms")
print(f"Samples per second: {1/avg_time:.1f}")

In [None]:
# Save the trained model
model_save_path = '../models/distilbert_ai_detector'
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Model saved to: {model_save_path}")

# Save test results
results_dict = {
    'model_name': 'DistilBERT-base-uncased',
    'test_accuracy': float(test_results['eval_accuracy']),
    'test_f1': float(test_results['eval_f1']),
    'test_precision': float(test_results['eval_precision']),
    'test_recall': float(test_results['eval_recall']),
    'confusion_matrix': cm.tolist(),
    'training_samples': len(X_train),
    'test_samples': len(X_test),
    'training_time_minutes': training_time / 60,
    'inference_time_ms': avg_time * 1000,
    'samples_per_second': 1 / avg_time,
    'model_parameters': model.num_parameters()
}

import json
with open('../models/distilbert_results.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("Results saved to '../models/distilbert_results.json'")

In [None]:
# Test with sample predictions
def predict_text(text, model, tokenizer, device, max_length=512):
    """Predict if text is AI-generated or human-written"""
    model.eval()
    
    # Tokenize
    encoding = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    # Move to device
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    # Predict
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(probabilities, dim=-1)
    
    return prediction.item(), probabilities[0].cpu().numpy()

# Test with sample texts
model.to(device)

# Sample AI text (from your dataset)
ai_sample = df[df['generated'] == 1]['text'].iloc[0][:500]
pred, probs = predict_text(ai_sample, model, tokenizer, device)
print("Sample AI Text Prediction:")
print(f"Predicted: {'AI Generated' if pred == 1 else 'Human'}")
print(f"Confidence: {probs[pred]:.4f}")
print(f"Probabilities: Human={probs[0]:.4f}, AI={probs[1]:.4f}")

print("\n" + "="*50 + "\n")

# Sample human text
human_sample = df[df['generated'] == 0]['text'].iloc[0][:500]
pred, probs = predict_text(human_sample, model, tokenizer, device)
print("Sample Human Text Prediction:")
print(f"Predicted: {'AI Generated' if pred == 1 else 'Human'}")
print(f"Confidence: {probs[pred]:.4f}")
print(f"Probabilities: Human={probs[0]:.4f}, AI={probs[1]:.4f}")

print("\nDistilBERT model training completed successfully!")
print("\nKey Advantages of DistilBERT:")
print("- Faster training and inference")
print("- Smaller model size (66% fewer parameters)")
print("- Good performance retention")
print("- Better for real-time applications")