# AI Text Detector - Model Comparison

This notebook compares the performance of RoBERTa and DistilBERT models for AI text detection.

## Models Compared
1. **RoBERTa-base**: High accuracy, slower inference
2. **DistilBERT-base**: Fast inference, smaller size

## Comparison Metrics
- Accuracy, Precision, Recall, F1-Score
- Training time and inference speed
- Model size and memory usage
- ROC curves and performance analysis

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import roc_curve, auc, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")

In [None]:
# Load model results
try:
    with open('../models/roberta_results.json', 'r') as f:
        roberta_results = json.load(f)
    print("RoBERTa results loaded successfully!")
except FileNotFoundError:
    print("RoBERTa results not found. Please run the RoBERTa training notebook first.")
    roberta_results = None

try:
    with open('../models/distilbert_results.json', 'r') as f:
        distilbert_results = json.load(f)
    print("DistilBERT results loaded successfully!")
except FileNotFoundError:
    print("DistilBERT results not found. Please run the DistilBERT training notebook first.")
    distilbert_results = None

if roberta_results and distilbert_results:
    print("\nBoth model results loaded. Proceeding with comparison...")
else:
    print("\nPlease ensure both models are trained before running this comparison.")

In [None]:
# Create comparison dataframe
if roberta_results and distilbert_results:
    comparison_data = {
        'Model': ['RoBERTa-base', 'DistilBERT-base'],
        'Accuracy': [roberta_results['test_accuracy'], distilbert_results['test_accuracy']],
        'F1-Score': [roberta_results['test_f1'], distilbert_results['test_f1']],
        'Precision': [roberta_results['test_precision'], distilbert_results['test_precision']],
        'Recall': [roberta_results['test_recall'], distilbert_results['test_recall']],
        'Training Time (min)': [
            roberta_results.get('training_time_minutes', 'N/A'),
            distilbert_results.get('training_time_minutes', 'N/A')
        ],
        'Inference Time (ms)': [
            roberta_results.get('inference_time_ms', 'N/A'),
            distilbert_results.get('inference_time_ms', 'N/A')
        ],
        'Model Parameters': [
            roberta_results.get('model_parameters', 'N/A'),
            distilbert_results.get('model_parameters', 'N/A')
        ]
    }
    
    comparison_df = pd.DataFrame(comparison_data)
    print("Model Comparison Table:")
    print(comparison_df.round(4))
else:
    print("Cannot create comparison without both model results.")

In [None]:
# Visualize performance metrics
if roberta_results and distilbert_results:
    metrics = ['Accuracy', 'F1-Score', 'Precision', 'Recall']
    roberta_scores = [comparison_df.loc[0, metric] for metric in metrics]
    distilbert_scores = [comparison_df.loc[1, metric] for metric in metrics]
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Performance Metrics Comparison', 'Model Size Comparison',
                       'Speed Comparison', 'Accuracy vs Speed Trade-off'),
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "scatter"}]]
    )
    
    # Performance metrics
    fig.add_trace(
        go.Bar(x=metrics, y=roberta_scores, name='RoBERTa', marker_color='#FF6B6B'),
        row=1, col=1
    )
    fig.add_trace(
        go.Bar(x=metrics, y=distilbert_scores, name='DistilBERT', marker_color='#4ECDC4'),
        row=1, col=1
    )
    
    # Model size comparison
    if isinstance(roberta_results.get('model_parameters'), (int, float)) and isinstance(distilbert_results.get('model_parameters'), (int, float)):
        model_sizes = [roberta_results['model_parameters']/1e6, distilbert_results['model_parameters']/1e6]
        fig.add_trace(
            go.Bar(x=['RoBERTa', 'DistilBERT'], y=model_sizes, 
                  marker_color=['#FF6B6B', '#4ECDC4'], showlegend=False),
            row=1, col=2
        )
    
    # Speed comparison
    if isinstance(roberta_results.get('inference_time_ms'), (int, float)) and isinstance(distilbert_results.get('inference_time_ms'), (int, float)):
        inference_times = [roberta_results['inference_time_ms'], distilbert_results['inference_time_ms']]
        fig.add_trace(
            go.Bar(x=['RoBERTa', 'DistilBERT'], y=inference_times, 
                  marker_color=['#FF6B6B', '#4ECDC4'], showlegend=False),
            row=2, col=1
        )
    
    # Accuracy vs Speed trade-off
    if isinstance(roberta_results.get('inference_time_ms'), (int, float)) and isinstance(distilbert_results.get('inference_time_ms'), (int, float)):
        fig.add_trace(
            go.Scatter(
                x=[roberta_results['inference_time_ms'], distilbert_results['inference_time_ms']],
                y=[roberta_results['test_accuracy'], distilbert_results['test_accuracy']],
                mode='markers+text',
                text=['RoBERTa', 'DistilBERT'],
                textposition="top center",
                marker=dict(size=15, color=['#FF6B6B', '#4ECDC4']),
                showlegend=False
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_xaxes(title_text="Metrics", row=1, col=1)
    fig.update_yaxes(title_text="Score", row=1, col=1)
    fig.update_xaxes(title_text="Model", row=1, col=2)
    fig.update_yaxes(title_text="Parameters (Millions)", row=1, col=2)
    fig.update_xaxes(title_text="Model", row=2, col=1)
    fig.update_yaxes(title_text="Inference Time (ms)", row=2, col=1)
    fig.update_xaxes(title_text="Inference Time (ms)", row=2, col=2)
    fig.update_yaxes(title_text="Accuracy", row=2, col=2)
    
    fig.update_layout(title_text="Model Performance Comparison", height=800)
    fig.show()
else:
    print("Cannot create visualizations without both model results.")

In [None]:
# Compare confusion matrices
if roberta_results and distilbert_results:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # RoBERTa confusion matrix
    roberta_cm = np.array(roberta_results['confusion_matrix'])
    sns.heatmap(roberta_cm, annot=True, fmt='d', cmap='Reds', 
                xticklabels=['Human', 'AI'], yticklabels=['Human', 'AI'],
                ax=axes[0])
    axes[0].set_title('RoBERTa Confusion Matrix')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('Actual')
    
    # DistilBERT confusion matrix
    distilbert_cm = np.array(distilbert_results['confusion_matrix'])
    sns.heatmap(distilbert_cm, annot=True, fmt='d', cmap='Greens', 
                xticklabels=['Human', 'AI'], yticklabels=['Human', 'AI'],
                ax=axes[1])
    axes[1].set_title('DistilBERT Confusion Matrix')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('Actual')
    
    plt.tight_layout()
    plt.show()
    
    # Calculate error analysis
    roberta_errors = roberta_cm[0,1] + roberta_cm[1,0]  # False positives + False negatives
    distilbert_errors = distilbert_cm[0,1] + distilbert_cm[1,0]
    
    print(f"\nError Analysis:")
    print(f"RoBERTa total errors: {roberta_errors}")
    print(f"DistilBERT total errors: {distilbert_errors}")
    print(f"Difference: {abs(roberta_errors - distilbert_errors)} errors")
else:
    print("Cannot create confusion matrix comparison without both model results.")

In [None]:
# Performance summary and recommendations
if roberta_results and distilbert_results:
    print("="*80)
    print("MODEL COMPARISON SUMMARY")
    print("="*80)
    
    # Determine best performing model
    if roberta_results['test_accuracy'] > distilbert_results['test_accuracy']:
        best_accuracy = "RoBERTa"
        accuracy_diff = roberta_results['test_accuracy'] - distilbert_results['test_accuracy']
    else:
        best_accuracy = "DistilBERT"
        accuracy_diff = distilbert_results['test_accuracy'] - roberta_results['test_accuracy']
    
    print(f"\n🎯 ACCURACY WINNER: {best_accuracy}")
    print(f"   Accuracy difference: {accuracy_diff:.4f} ({accuracy_diff*100:.2f}%)")
    
    # Speed comparison
    if isinstance(roberta_results.get('inference_time_ms'), (int, float)) and isinstance(distilbert_results.get('inference_time_ms'), (int, float)):
        if roberta_results['inference_time_ms'] < distilbert_results['inference_time_ms']:
            speed_winner = "RoBERTa"
            speed_diff = distilbert_results['inference_time_ms'] - roberta_results['inference_time_ms']
        else:
            speed_winner = "DistilBERT"
            speed_diff = roberta_results['inference_time_ms'] - distilbert_results['inference_time_ms']
        
        print(f"\n⚡ SPEED WINNER: {speed_winner}")
        print(f"   Speed advantage: {speed_diff:.1f}ms faster per prediction")
    
    print(f"\n📊 DETAILED METRICS:")
    print(f"   RoBERTa    - Accuracy: {roberta_results['test_accuracy']:.4f}, F1: {roberta_results['test_f1']:.4f}")
    print(f"   DistilBERT - Accuracy: {distilbert_results['test_accuracy']:.4f}, F1: {distilbert_results['test_f1']:.4f}")
    
    print(f"\n💡 RECOMMENDATIONS:")
    print(f"   📈 For HIGHEST ACCURACY: Use RoBERTa")
    print(f"      - Best for research, batch processing")
    print(f"      - When accuracy is more important than speed")
    
    print(f"   ⚡ For REAL-TIME APPLICATIONS: Use DistilBERT")
    print(f"      - Best for web apps, mobile apps")
    print(f"      - When speed matters and accuracy is sufficient")
    
    print(f"   🎯 For BALANCED PERFORMANCE: DistilBERT")
    print(f"      - Good accuracy with much better speed")
    print(f"      - Recommended for most production use cases")
    
    print("\n" + "="*80)
else:
    print("Cannot generate summary without both model results.")

In [None]:
# Save comparison results
if roberta_results and distilbert_results:
    comparison_summary = {
        'comparison_date': pd.Timestamp.now().isoformat(),
        'models_compared': ['RoBERTa-base', 'DistilBERT-base'],
        'dataset_info': {
            'total_samples': roberta_results['training_samples'] + roberta_results['test_samples'],
            'training_samples': roberta_results['training_samples'],
            'test_samples': roberta_results['test_samples']
        },
        'performance_comparison': comparison_data,
        'recommendations': {
            'best_accuracy': best_accuracy,
            'best_speed': speed_winner if 'speed_winner' in locals() else 'Unknown',
            'recommended_for_production': 'DistilBERT',
            'recommended_for_research': 'RoBERTa'
        }
    }
    
    with open('../models/model_comparison.json', 'w') as f:
        json.dump(comparison_summary, f, indent=2)
    
    print("Comparison results saved to '../models/model_comparison.json'")
    
    # Also save as CSV for easy viewing
    comparison_df.to_csv('../models/model_comparison.csv', index=False)
    print("Comparison table saved to '../models/model_comparison.csv'")
    
    print("\nModel comparison completed successfully!")
    print("Both models are ready for deployment in the Streamlit app.")
else:
    print("Cannot save comparison without both model results.")