# Is reCAPTCHAv2 Safe? - TIMM Implementation

**Educational Research Project - UFABC Artificial Intelligence Course**

This notebook implements a deep learning classifier using **PyTorch Image Models (timm)** to analyze the viability of reCAPTCHAv2 as a CAPTCHA method.

## Model Options
You can experiment with different pre-trained models by changing `model_name` in CONFIG:
- `efficientnet_b0` - Efficient and fast (default)
- `resnet50` - Classic architecture
- `vit_small_patch16_224` - Vision Transformer
- `convnext_tiny` - Modern ConvNet
- `mobilenetv3_large_100` - Lightweight mobile model

## Features
- Transfer learning with timm pre-trained models
- Data augmentation for improved generalization
- Early stopping to prevent overfitting
- Learning rate scheduling (Cosine Annealing)
- MPS/CUDA/CPU support
- Training visualization and metrics

## Dataset Structure
```
dataset/
├── train/
│   ├── Bicycle/
│   ├── Bridge/
│   ├── Bus/
│   └── ...
└── val/
    ├── Bicycle/
    ├── Bridge/
    ├── Bus/
    └── ...
```

In [1]:
!mv ../dataset/Training ../dataset/train
!mv ../dataset/Validation ../dataset/val

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm
from timm.data import create_transform
from timm.loss import LabelSmoothingCrossEntropy
import os
from pathlib import Path
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd

# Configuration
CONFIG = {
    'model_name': 'efficientnet_b0',  # Can try: 'resnet50', 'vit_small_patch16_224', 'convnext_tiny'
    'img_size': 224,
    'batch_size': 32,
    'epochs': 100,
    'patience': 50,
    'lr': 0.001,
    'device': 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu',
    'num_workers': 4,
    'data_dir': '../dataset',
    'save_dir': 'is_recaptchav2_safe/timm_experiment'
}

print(f"Using device: {CONFIG['device']}")
print(f"Model: {CONFIG['model_name']}")

# Create save directory
Path(CONFIG['save_dir']).mkdir(parents=True, exist_ok=True)

In [None]:
# Data Transforms
train_transform = transforms.Compose([
    transforms.Resize((CONFIG['img_size'], CONFIG['img_size'])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((CONFIG['img_size'], CONFIG['img_size'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder(
    root=os.path.join(CONFIG['data_dir'], 'train'),
    transform=train_transform
)

val_dataset = datasets.ImageFolder(
    root=os.path.join(CONFIG['data_dir'], 'val'),
    transform=val_transform
)

train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=True,
    num_workers=CONFIG['num_workers'],
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=False,
    num_workers=CONFIG['num_workers'],
    pin_memory=True
)

num_classes = len(train_dataset.classes)
class_names = train_dataset.classes

print(f"\nDataset Statistics:")
print(f"Number of classes: {num_classes}")
print(f"Classes: {class_names}")
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

In [None]:
# Create model using timm
model = timm.create_model(
    CONFIG['model_name'],
    pretrained=True,
    num_classes=num_classes
)

model = model.to(CONFIG['device'])

# Loss function and optimizer
criterion = LabelSmoothingCrossEntropy(smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=0.01)

# Learning rate scheduler
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,
    T_mult=2,
    eta_min=1e-6
)

print(f"\nModel created: {CONFIG['model_name']}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

In [None]:
# Training and validation functions
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({
            'loss': f'{running_loss/len(pbar):.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    return running_loss / len(train_loader), 100. * correct / total

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Validation')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            pbar.set_postfix({
                'loss': f'{running_loss/len(pbar):.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
    
    return running_loss / len(val_loader), 100. * correct / total

In [None]:
# Training loop with early stopping
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'lr': []
}

best_val_acc = 0.0
patience_counter = 0
best_model_path = os.path.join(CONFIG['save_dir'], 'best.pt')
last_model_path = os.path.join(CONFIG['save_dir'], 'last.pt')

print("\n" + "="*50)
print("Starting Training")
print("="*50)

for epoch in range(CONFIG['epochs']):
    print(f"\nEpoch {epoch+1}/{CONFIG['epochs']}")
    print("-" * 50)
    
    # Train
    train_loss, train_acc = train_epoch(
        model, train_loader, criterion, optimizer, CONFIG['device']
    )
    
    # Validate
    val_loss, val_acc = validate(
        model, val_loader, criterion, CONFIG['device']
    )
    
    # Update learning rate
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['lr'].append(current_lr)
    
    print(f"\nEpoch Summary:")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    print(f"Learning Rate: {current_lr:.6f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'val_loss': val_loss,
            'class_names': class_names
        }, best_model_path)
        print(f"✓ New best model saved! Val Acc: {val_acc:.2f}%")
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"Patience: {patience_counter}/{CONFIG['patience']}")
    
    # Save last model
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_acc': val_acc,
        'val_loss': val_loss,
        'class_names': class_names
    }, last_model_path)
    
    # Early stopping
    if patience_counter >= CONFIG['patience']:
        print(f"\nEarly stopping triggered after {epoch+1} epochs")
        break

print("\n" + "="*50)
print("Training Completed!")
print("="*50)
print(f"Best validation accuracy: {best_val_acc:.2f}%")
print(f"Best model saved at: {best_model_path}")
print(f"Last model saved at: {last_model_path}")

In [None]:
# Save training history to CSV
df = pd.DataFrame(history)
df.to_csv(os.path.join(CONFIG['save_dir'], 'training_history.csv'), index=False)
print(f"\nTraining history saved to: {os.path.join(CONFIG['save_dir'], 'training_history.csv')}")

In [None]:
# Plot training curves
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot loss
axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0].plot(history['val_loss'], label='Val Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Plot accuracy
axes[1].plot(history['train_acc'], label='Train Acc', marker='o')
axes[1].plot(history['val_acc'], label='Val Acc', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True)

# Plot learning rate
axes[2].plot(history['lr'], label='Learning Rate', marker='o', color='green')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Learning Rate')
axes[2].set_title('Learning Rate Schedule')
axes[2].set_yscale('log')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig(os.path.join(CONFIG['save_dir'], 'training_plots.png'), dpi=300, bbox_inches='tight')
plt.show()

print(f"Training plots saved to: {os.path.join(CONFIG['save_dir'], 'training_plots.png')}")

## Model Evaluation and Inference

Now you can use the trained model to make predictions on new reCAPTCHA images.

In [None]:
# Load best model for inference
checkpoint = torch.load(best_model_path, map_location=CONFIG['device'])
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Loaded best model from epoch {checkpoint['epoch']+1}")
print(f"Best validation accuracy: {checkpoint['val_acc']:.2f}%")
print(f"Classes: {checkpoint['class_names']}")

# Function to predict on a single image
def predict_image(model, image_path, transform, device, class_names):
    """
    Predict the class of a single image
    """
    from PIL import Image
    
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        confidence, predicted = probabilities.max(1)
    
    return class_names[predicted.item()], confidence.item()

# Example usage (uncomment to use):
# image_path = 'path/to/your/test/image.jpg'
# predicted_class, confidence = predict_image(model, image_path, val_transform, CONFIG['device'], class_names)
# print(f"Predicted: {predicted_class} (Confidence: {confidence*100:.2f}%)")

In [None]:
# Detailed evaluation on validation set with confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import numpy as np

def evaluate_model(model, val_loader, device, class_names):
    """
    Comprehensive model evaluation
    """
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc='Evaluating'):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    # Classification report
    print("\nClassification Report:")
    print("="*80)
    print(classification_report(all_labels, all_preds, target_names=class_names))
    
    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(os.path.join(CONFIG['save_dir'], 'confusion_matrix.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    # Per-class accuracy
    print("\nPer-Class Accuracy:")
    print("="*80)
    class_correct = np.diag(cm)
    class_total = cm.sum(axis=1)
    class_acc = class_correct / class_total * 100
    
    for i, class_name in enumerate(class_names):
        print(f"{class_name:20s}: {class_acc[i]:6.2f}% ({class_correct[i]:4d}/{class_total[i]:4d})")
    
    overall_acc = np.diag(cm).sum() / cm.sum() * 100
    print(f"\n{'Overall Accuracy':20s}: {overall_acc:6.2f}%")
    
    return cm, class_acc

# Run evaluation
cm, class_acc = evaluate_model(model, val_loader, CONFIG['device'], class_names)