# AI Text Detector - RoBERTa Model Training

This notebook trains a RoBERTa-based model for AI text detection using your balanced dataset of 23,276 samples.

## Model: RoBERTa-base
- **Strengths**: Excellent text understanding, robust performance
- **Use Case**: High-accuracy AI detection
- **Architecture**: Transformer-based encoder model

In [1]:
# Import Required Libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    RobertaTokenizer, RobertaForSequenceClassification,
    TrainingArguments, Trainer, EvalPrediction
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

Using device: cpu


In [2]:
# Load and prepare the dataset
print("Loading dataset...")
df = pd.read_csv('../Training_Essay_Data.csv')

print(f"Dataset shape: {df.shape}")
print(f"Class distribution:")
print(df['generated'].value_counts())

# Prepare data
texts = df['text'].tolist()
labels = df['generated'].tolist()

print(f"\nTotal samples: {len(texts)}")
print(f"Sample text length: {len(texts[0])} characters")

Loading dataset...
Dataset shape: (23274, 2)
Class distribution:
generated
1    11637
0    11637
Name: count, dtype: int64

Total samples: 23274
Sample text length: 4091 characters
Dataset shape: (23274, 2)
Class distribution:
generated
1    11637
0    11637
Name: count, dtype: int64

Total samples: 23274
Sample text length: 4091 characters


In [3]:
# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(
    texts, labels, test_size=0.3, random_state=42, stratify=labels
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")

# Check distribution
print(f"\nTraining set distribution:")
print(f"Human: {y_train.count(0)}, AI: {y_train.count(1)}")
print(f"Validation set distribution:")
print(f"Human: {y_val.count(0)}, AI: {y_val.count(1)}")

Training samples: 16291
Validation samples: 3491
Test samples: 3492

Training set distribution:
Human: 8145, AI: 8146
Validation set distribution:
Human: 1746, AI: 1745


In [4]:
# Initialize RoBERTa tokenizer and model
MODEL_NAME = 'roberta-base'
MAX_LENGTH = 512  # RoBERTa's maximum sequence length

print(f"Loading {MODEL_NAME} tokenizer and model...")
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
model = RobertaForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)

print(f"Model loaded successfully!")
print(f"Model parameters: {model.num_parameters():,}")

Loading roberta-base tokenizer and model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully!
Model parameters: 124,647,170


In [5]:
# Create custom dataset class
class AIDetectionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Create datasets
print("Creating datasets...")
train_dataset = AIDetectionDataset(X_train, y_train, tokenizer, MAX_LENGTH)
val_dataset = AIDetectionDataset(X_val, y_val, tokenizer, MAX_LENGTH)
test_dataset = AIDetectionDataset(X_test, y_test, tokenizer, MAX_LENGTH)

print("Datasets created successfully!")

Creating datasets...
Datasets created successfully!


In [6]:
# Define metrics function
def compute_metrics(eval_pred: EvalPrediction):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    accuracy = accuracy_score(labels, predictions)
    
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

print("Metrics function defined.")

Metrics function defined.


In [8]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir='../models/roberta_results',
    num_train_epochs=3,
    per_device_train_batch_size=8,  # Adjust based on GPU memory
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='../models/roberta_logs',
    logging_steps=100,
    eval_strategy="steps",  # Changed from evaluation_strategy
    eval_steps=500,
    save_steps=1000,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    push_to_hub=False,
    dataloader_num_workers=0,  # Set to 0 for Windows compatibility
    fp16=torch.cuda.is_available(),  # Use mixed precision if GPU available
)

print("Training arguments configured.")

Training arguments configured.


In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")
print("This may take 30-60 minutes depending on your hardware.")

# Train the model
training_result = trainer.train()

print("Training completed!")
print(f"Training loss: {training_result.training_loss:.4f}")

Trainer initialized. Starting training...
This may take 30-60 minutes depending on your hardware.


Step,Training Loss,Validation Loss


In [None]:
# Evaluate on test set
print("Evaluating on test set...")
test_results = trainer.evaluate(test_dataset)

print("\nTest Results:")
for key, value in test_results.items():
    if key.startswith('eval_'):
        metric_name = key.replace('eval_', '').title()
        print(f"{metric_name}: {value:.4f}")

In [None]:
# Get detailed predictions for analysis
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = predictions.label_ids

# Classification report
print("\nDetailed Classification Report:")
print(classification_report(y_true, y_pred, 
                          target_names=['Human', 'AI Generated'],
                          digits=4))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print(f"\nConfusion Matrix:")
print(cm)

In [None]:
# Visualize confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Human', 'AI Generated'],
            yticklabels=['Human', 'AI Generated'])
plt.title('RoBERTa Model - Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()

# Calculate additional metrics
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp)
sensitivity = tp / (tp + fn)
precision = tp / (tp + fp)
f1 = 2 * (precision * sensitivity) / (precision + sensitivity)

print(f"\nAdditional Metrics:")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1-Score: {f1:.4f}")

In [None]:
# Save the trained model
model_save_path = '../models/roberta_ai_detector'
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Model saved to: {model_save_path}")

# Save test results
results_dict = {
    'model_name': 'RoBERTa-base',
    'test_accuracy': float(test_results['eval_accuracy']),
    'test_f1': float(test_results['eval_f1']),
    'test_precision': float(test_results['eval_precision']),
    'test_recall': float(test_results['eval_recall']),
    'confusion_matrix': cm.tolist(),
    'training_samples': len(X_train),
    'test_samples': len(X_test)
}

import json
with open('../models/roberta_results.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("Results saved to '../models/roberta_results.json'")

In [None]:
# Test with sample predictions
def predict_text(text, model, tokenizer, device, max_length=512):
    """Predict if text is AI-generated or human-written"""
    model.eval()
    
    # Tokenize
    encoding = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    # Move to device
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    # Predict
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(probabilities, dim=-1)
    
    return prediction.item(), probabilities[0].cpu().numpy()

# Test with sample texts
model.to(device)

# Sample AI text (from your dataset)
ai_sample = df[df['generated'] == 1]['text'].iloc[0][:500]
pred, probs = predict_text(ai_sample, model, tokenizer, device)
print("Sample AI Text Prediction:")
print(f"Predicted: {'AI Generated' if pred == 1 else 'Human'}")
print(f"Confidence: {probs[pred]:.4f}")
print(f"Probabilities: Human={probs[0]:.4f}, AI={probs[1]:.4f}")

print("\n" + "="*50 + "\n")

# Sample human text
human_sample = df[df['generated'] == 0]['text'].iloc[0][:500]
pred, probs = predict_text(human_sample, model, tokenizer, device)
print("Sample Human Text Prediction:")
print(f"Predicted: {'AI Generated' if pred == 1 else 'Human'}")
print(f"Confidence: {probs[pred]:.4f}")
print(f"Probabilities: Human={probs[0]:.4f}, AI={probs[1]:.4f}")

print("\nRoBERTa model training completed successfully!")