## 1. Setup and Installation

In [None]:
%pip install --upgrade pip
%pip install numpy --upgrade --only-binary :all:
%pip install adversarial-robustness-toolbox torch torchvision pillow matplotlib opencv-python --only-binary :all:

## 2. Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset

from art.attacks.evasion import ProjectedGradientDescent
from art.estimators.classification import PyTorchClassifier
from art.defences.trainer import AdversarialTrainer

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print("GPU detected: Using 20 epochs for thorough training")
    EPOCHS = 20
else:
    print("CPU detected: Using 10 epochs (training will be slower)")
    EPOCHS = 10

## 3. Load and Prepare Dataset

We'll use STL-10 dataset which has higher resolution (96x96) images and includes classes like cars, trucks, airplanes, etc.

In [None]:
# STL-10 preprocessing (96x96 images)
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Load STL-10 dataset (higher resolution: 96x96 vs CIFAR-10's 32x32)
print("Downloading STL-10 dataset...")
train_dataset = torchvision.datasets.STL10(root='./data', split='train', download=True, transform=transform)
test_dataset = torchvision.datasets.STL10(root='./data', split='test', download=True, transform=transform)

# Use full training set for better accuracy (5000 samples)
# STL-10 has 5000 training images and 8000 test images
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Smaller batch due to larger images
test_subset = Subset(test_dataset, range(1000))   # Use 1000 test samples
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# STL-10 class names
class_names = ['airplane', 'bird', 'car', 'cat', 'deer', 
               'dog', 'horse', 'monkey', 'ship', 'truck']

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_subset)}")
print(f"Image size: 96x96 (3x sharper than CIFAR-10)")
print(f"Classes: {class_names}")

### Visualize Sample Images

In [None]:
# Display some sample images
def show_images(images, labels, title="Sample Images", predictions=None):
    # Ensure we don't try to display more images than available
    num_images = min(10, len(images))
    fig, axes = plt.subplots(2, 5, figsize=(12, 5))
    axes = axes.ravel()
    
    for i in range(num_images):
        img = images[i].permute(1, 2, 0).numpy()
        axes[i].imshow(img)
        
        if predictions is not None:
            pred_label = class_names[predictions[i]]
            true_label = class_names[labels[i]]
            color = 'green' if predictions[i] == labels[i] else 'red'
            axes[i].set_title(f"True: {true_label}\nPred: {pred_label}", color=color, fontsize=9)
        else:
            axes[i].set_title(class_names[labels[i]], fontsize=10)
        axes[i].axis('off')
    
    # Hide unused subplots if we have fewer than 10 images
    for i in range(num_images, 10):
        axes[i].axis('off')
    
    plt.suptitle(title, fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

# Get a batch of images
dataiter = iter(test_loader)
images, labels = next(dataiter)
show_images(images, labels, "STL-10 Sample Images (96x96)")

## 4. Define a Simple CNN Model

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv3 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # After 4 pooling layers: 96 -> 48 -> 24 -> 12 -> 6
        self.fc1 = nn.Linear(256 * 6 * 6, 512)
        self.fc2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))
        x = x.view(-1, 256 * 6 * 6)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

print("CNN model defined!")

## 5. Train Standard Model (Without Adversarial Training)

First, we'll train a standard model to establish a baseline.

In [None]:
def train_model(model, train_loader, epochs=5):
    """Train a standard PyTorch model"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            if (i + 1) % 20 == 0:
                print(f"  Batch [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
        
        epoch_acc = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%\n")
    
    return model

# Create and train standard model
print(f"Training standard model (no adversarial training) for {EPOCHS} epochs...\n")
standard_model = SimpleCNN().to(device)
standard_model = train_model(standard_model, train_loader, epochs=EPOCHS)
print("Standard model training complete!")

## 6. Evaluate Standard Model on Clean and Adversarial Examples

In [None]:
def evaluate_model(model, test_loader, attack=None, attack_name="Clean"):
    """Evaluate model on test data"""
    model.eval()
    correct = 0
    total = 0
    
    for inputs, labels in test_loader:
        inputs = inputs.numpy()
        labels_np = labels.numpy()
        
        # Generate adversarial examples if attack is provided
        if attack is not None:
            inputs = attack.generate(x=inputs)
        
        # Convert back to torch and evaluate
        with torch.no_grad():
            inputs_torch = torch.from_numpy(inputs).to(device)
            outputs = model(inputs_torch)
            _, predicted = torch.max(outputs.data, 1)
        
        total += len(labels_np)
        correct += (predicted.cpu().numpy() == labels_np).sum()
    
    accuracy = 100 * correct / total
    print(f"{attack_name} Accuracy: {accuracy:.2f}%")
    return accuracy

# Wrap model with ART classifier
# Note: We need to provide an optimizer even though we're not training, 
# because PGD attack needs to compute gradients
standard_classifier = PyTorchClassifier(
    model=standard_model,
    loss=nn.CrossEntropyLoss(),
    optimizer=optim.Adam(standard_model.parameters(), lr=0.001),
    input_shape=(3, 96, 96),
    nb_classes=10,
    clip_values=(0, 1)
)

# Create PGD attack
pgd_attack = ProjectedGradientDescent(
    estimator=standard_classifier,
    eps=0.03,
    eps_step=0.01,
    max_iter=10,
    targeted=False
)

print("\n=== Standard Model Performance ===")
standard_clean_acc = evaluate_model(standard_model, test_loader, attack=None, attack_name="Clean")
standard_adv_acc = evaluate_model(standard_model, test_loader, attack=pgd_attack, attack_name="Adversarial (PGD)")

### Visualize Standard Model Predictions

In [None]:
# Get a batch for visualization
dataiter = iter(test_loader)
images, labels = next(dataiter)

# Clean predictions
standard_model.eval()
with torch.no_grad():
    outputs = standard_model(images.to(device))
    _, clean_preds = torch.max(outputs, 1)

# Adversarial predictions
adv_images = pgd_attack.generate(x=images.numpy())
with torch.no_grad():
    outputs = standard_model(torch.from_numpy(adv_images).to(device))
    _, adv_preds = torch.max(outputs, 1)

# Display results
print("Clean Images - Standard Model:")
show_images(images, labels, "Standard Model: Clean Images", clean_preds.cpu().numpy())

print("\nAdversarial Images - Standard Model:")
show_images(torch.from_numpy(adv_images), labels, "Standard Model: Adversarial Images (PGD)", adv_preds.cpu().numpy())

## 7. Adversarial Training

Now we'll train a model using adversarial training - training on both clean and adversarial examples.

In [None]:
# Create a new model for adversarial training
robust_model = SimpleCNN().to(device)

# Wrap with ART classifier
robust_classifier = PyTorchClassifier(
    model=robust_model,
    loss=nn.CrossEntropyLoss(),
    optimizer=optim.Adam(robust_model.parameters(), lr=0.001),
    input_shape=(3, 96, 96),
    nb_classes=10,
    clip_values=(0, 1)
)

# Create adversarial trainer
pgd_attack_train = ProjectedGradientDescent(
    estimator=robust_classifier,
    eps=0.03,
    eps_step=0.01,
    max_iter=10,
    targeted=False
)

trainer = AdversarialTrainer(robust_classifier, attacks=pgd_attack_train, ratio=0.5)

print("Starting adversarial training...\n")
print("This trains the model on a mix of clean (50%) and adversarial (50%) examples.\n")

# Prepare data for ART
X_train = []
y_train = []
for images, labels in train_loader:
    X_train.append(images.numpy())
    y_train.append(labels.numpy())

X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)

# Perform adversarial training
print(f"Training for {EPOCHS} epochs...")
trainer.fit(X_train, y_train, nb_epochs=EPOCHS, batch_size=32)

print("\nAdversarial training complete!")

## 8. Evaluate Robust Model

In [None]:
# Update the attack to use the robust classifier
pgd_attack_robust = ProjectedGradientDescent(
    estimator=robust_classifier,
    eps=0.03,
    eps_step=0.01,
    max_iter=10,
    targeted=False
)

print("\n=== Robust Model Performance (After Adversarial Training) ===")
robust_clean_acc = evaluate_model(robust_model, test_loader, attack=None, attack_name="Clean")
robust_adv_acc = evaluate_model(robust_model, test_loader, attack=pgd_attack_robust, attack_name="Adversarial (PGD)")

### Visualize Robust Model Predictions

In [None]:
# Use the same batch from earlier
# Clean predictions
robust_model.eval()
with torch.no_grad():
    outputs = robust_model(images.to(device))
    _, clean_preds_robust = torch.max(outputs, 1)

# Adversarial predictions
adv_images_robust = pgd_attack_robust.generate(x=images.numpy())
with torch.no_grad():
    outputs = robust_model(torch.from_numpy(adv_images_robust).to(device))
    _, adv_preds_robust = torch.max(outputs, 1)

# Display results
print("Clean Images - Robust Model:")
show_images(images, labels, "Robust Model: Clean Images", clean_preds_robust.cpu().numpy())

print("\nAdversarial Images - Robust Model:")
show_images(torch.from_numpy(adv_images_robust), labels, "Robust Model: Adversarial Images (PGD)", adv_preds_robust.cpu().numpy())

## 9. Compare Results

In [None]:
# Create comparison chart
models = ['Standard Model', 'Robust Model']
clean_accuracies = [standard_clean_acc, robust_clean_acc]
adv_accuracies = [standard_adv_acc, robust_adv_acc]

x = np.arange(len(models))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width/2, clean_accuracies, width, label='Clean Accuracy', color='skyblue')
bars2 = ax.bar(x + width/2, adv_accuracies, width, label='Adversarial Accuracy', color='salmon')

ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Model Performance Comparison', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(models, fontsize=11)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
def add_value_labels(bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}%',
                ha='center', va='bottom', fontsize=10, fontweight='bold')

add_value_labels(bars1)
add_value_labels(bars2)

plt.tight_layout()
plt.show()

# Print improvement
improvement = robust_adv_acc - standard_adv_acc
print(f"\n{'='*50}")
print(f"ADVERSARIAL ROBUSTNESS IMPROVEMENT: +{improvement:.2f}%")
print(f"{'='*50}")
print(f"\nStandard Model:")
print(f"  - Clean Accuracy: {standard_clean_acc:.2f}%")
print(f"  - Adversarial Accuracy: {standard_adv_acc:.2f}%")
print(f"  - Robustness Gap: {standard_clean_acc - standard_adv_acc:.2f}%")
print(f"\nRobust Model (Adversarial Training):")
print(f"  - Clean Accuracy: {robust_clean_acc:.2f}%")
print(f"  - Adversarial Accuracy: {robust_adv_acc:.2f}%")
print(f"  - Robustness Gap: {robust_clean_acc - robust_adv_acc:.2f}%")

## 10. Summary

### Key Takeaways:

1. **Standard Model Vulnerability**:
   - High accuracy on clean images
   - Significant drop in accuracy on adversarial examples
   - Vulnerable to PGD attacks

2. **Adversarial Training Defense**:
   - Trains on both clean and adversarial examples
   - Significantly improves robustness against attacks
   - May slightly reduce clean accuracy but greatly improves adversarial accuracy

3. **Trade-offs**:
   - Adversarial training takes longer (generating adversarial examples during training)
   - Small potential decrease in clean accuracy
   - Large improvement in adversarial robustness

### Automotive AI Applications:

- **Critical for Safety**: Autonomous vehicles must be robust against adversarial perturbations
- **Real-world Attacks**: Physical adversarial patches could fool perception systems
- **Defense Strategy**: Adversarial training is one of the most effective defenses
- **Continuous Improvement**: Models should be regularly updated with new attack patterns

### Next Steps:

- Experiment with different attack strengths (eps values)
- Try different attack methods (FGSM, C&W, etc.)
- Combine with other defense mechanisms
- Test on domain-specific automotive datasets