# CNN Architecture Design for Digital Pathology

## Learning Objectives
- Design CNN architectures optimized for histopathology images
- Understand the unique challenges of pathology image analysis
- Implement transfer learning with pre-trained models
- Build custom architectures for patch-based classification

## Prerequisites
- Basic deep learning knowledge
- Familiarity with CNNs and PyTorch/TensorFlow
- Understanding of image classification concepts

Let's build powerful CNNs for automated pathology diagnosis!

In [None]:
# Import required libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from tqdm import tqdm

# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Using device: {device}")
print("🧠 Ready to build CNNs for digital pathology!")

In [None]:
# Custom Pathology CNN Architecture
class PathologyNet(nn.Module):
    """Custom CNN architecture optimized for histopathology images"""
    
    def __init__(self, num_classes=3, dropout_rate=0.5):
        super(PathologyNet, self).__init__()
        
        # Feature extraction layers
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.1),
            
            # Second block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.2),
            
            # Third block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.3),
            
            # Fourth block
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.4),
        )
        
        # Adaptive pooling to handle variable input sizes
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(256 * 7 * 7, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Create model instance
model = PathologyNet(num_classes=3).to(device)
print(f"✅ PathologyNet created with {sum(p.numel() for p in model.parameters())} parameters")

In [None]:
# Transfer Learning with Pre-trained Models
class PathologyTransferNet(nn.Module):
    """Transfer learning model using pre-trained backbone"""
    
    def __init__(self, backbone='resnet50', num_classes=3, freeze_backbone=True):
        super(PathologyTransferNet, self).__init__()
        
        # Load pre-trained model
        if backbone == 'resnet50':
            self.backbone = models.resnet50(pretrained=True)
            num_features = self.backbone.fc.in_features
            # Remove the final classification layer
            self.backbone = nn.Sequential(*list(self.backbone.children())[:-1])
        elif backbone == 'efficientnet':
            # For demonstration - would need efficientnet package
            print("⚠️ EfficientNet would require additional installation")
            self.backbone = models.resnet50(pretrained=True)
            num_features = 2048
            self.backbone = nn.Sequential(*list(self.backbone.children())[:-1])
        else:
            raise ValueError(f"Unsupported backbone: {backbone}")
        
        # Freeze backbone parameters if specified
        if freeze_backbone:
            for param in self.backbone.parameters():
                param.requires_grad = False
            print("🔒 Backbone frozen for transfer learning")
        
        # Custom classifier for pathology
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )
        
    def forward(self, x):
        features = self.backbone(x)
        features = features.view(features.size(0), -1)
        output = self.classifier(features)
        return output

# Create transfer learning model
transfer_model = PathologyTransferNet(backbone='resnet50', num_classes=3).to(device)
trainable_params = sum(p.numel() for p in transfer_model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in transfer_model.parameters())

print(f"✅ Transfer model created:")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")
print(f"   Frozen parameters: {total_params - trainable_params:,}")

In [None]:
# Synthetic pathology dataset for demonstration
class SyntheticPathologyDataset(Dataset):
    """Synthetic dataset for pathology image classification"""
    
    def __init__(self, num_samples=1000, image_size=224, transform=None):
        self.num_samples = num_samples
        self.image_size = image_size
        self.transform = transform
        
        # Create synthetic data
        np.random.seed(42)
        self.labels = np.random.randint(0, 3, num_samples)  # 3 classes
        
        # Class names
        self.class_names = ['Normal', 'Benign', 'Malignant']
        
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        # Generate synthetic pathology-like image
        label = self.labels[idx]
        
        # Create class-specific patterns
        if label == 0:  # Normal
            # More regular, uniform texture
            image = np.random.normal(0.6, 0.1, (self.image_size, self.image_size, 3))
        elif label == 1:  # Benign
            # Slightly more irregular
            image = np.random.normal(0.4, 0.15, (self.image_size, self.image_size, 3))
            # Add some texture
            image += 0.1 * np.random.random((self.image_size, self.image_size, 3))
        else:  # Malignant
            # More chaotic, varied texture
            image = np.random.normal(0.3, 0.2, (self.image_size, self.image_size, 3))
            # Add more complex patterns
            image += 0.2 * np.sin(np.arange(self.image_size)[:, None] * 0.1) * np.cos(np.arange(self.image_size)[None, :] * 0.1)[..., None]
        
        # Clip to valid range
        image = np.clip(image, 0, 1)
        
        # Convert to PIL Image
        image = Image.fromarray((image * 255).astype(np.uint8))
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Data transforms for pathology images
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=90),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = SyntheticPathologyDataset(num_samples=800, transform=train_transform)
val_dataset = SyntheticPathologyDataset(num_samples=200, transform=val_transform)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"📊 Dataset created:")
print(f"   Training samples: {len(train_dataset)}")
print(f"   Validation samples: {len(val_dataset)}")
print(f"   Batch size: {batch_size}")

In [None]:
# Training setup and functions
def train_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.001):
    """Train the CNN model"""
    
    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # Training history
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    
    print("🚀 Starting training...")
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        with tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}') as pbar:
            for images, labels in pbar:
                images, labels = images.to(device), labels.to(device)
                
                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                # Statistics
                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                # Update progress bar
                pbar.set_postfix({
                    'Loss': f'{loss.item():.4f}',
                    'Acc': f'{100.*train_correct/train_total:.2f}%'
                })
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        # Calculate epoch metrics
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_acc = 100. * train_correct / train_total
        val_acc = 100. * val_correct / val_total
        
        # Store history
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)
        
        # Update learning rate
        scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}]:')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        print('-' * 50)
    
    return {
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_accuracies': train_accuracies,
        'val_accuracies': val_accuracies
    }

# Train the custom model (reduced epochs for demo)
print("🏋️‍♂️ Training custom PathologyNet...")
history = train_model(model, train_loader, val_loader, num_epochs=3, learning_rate=0.001)

In [None]:
# Visualize training progress
def plot_training_history(history):
    """Plot training and validation metrics"""
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot loss
    epochs = range(1, len(history['train_losses']) + 1)
    ax1.plot(epochs, history['train_losses'], 'bo-', label='Training Loss')
    ax1.plot(epochs, history['val_losses'], 'ro-', label='Validation Loss')
    ax1.set_title('Training and Validation Loss', fontweight='bold')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot accuracy
    ax2.plot(epochs, history['train_accuracies'], 'bo-', label='Training Accuracy')
    ax2.plot(epochs, history['val_accuracies'], 'ro-', label='Validation Accuracy')
    ax2.set_title('Training and Validation Accuracy', fontweight='bold')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Plot training history
plot_training_history(history)

# Final performance summary
final_train_acc = history['train_accuracies'][-1]
final_val_acc = history['val_accuracies'][-1]
print(f"\n🏆 Final Performance:")
print(f"   Training Accuracy: {final_train_acc:.2f}%")
print(f"   Validation Accuracy: {final_val_acc:.2f}%")

## 🎯 Exercise: CNN Architecture Challenge

Design and implement your own CNN architecture for pathology classification:

1. **Custom Architecture**: Design a CNN with at least 4 convolutional blocks
2. **Transfer Learning**: Compare performance with pre-trained models
3. **Architecture Analysis**: Count parameters and analyze computational complexity
4. **Augmentation Strategy**: Implement pathology-specific data augmentation

### Expected Performance
Your CNN should achieve:
- **Training accuracy**: >80% within 10 epochs
- **Validation accuracy**: >75% (avoid overfitting)
- **Parameter efficiency**: <10M parameters
- **Convergence speed**: Loss should decrease steadily

### Advanced Challenge
Implement attention mechanisms or multi-scale feature fusion!

In [None]:
# 🎯 VALIDATION: CNN architecture requirements
def validate_cnn_architecture(model, history):
    """Validate CNN meets performance and architecture requirements"""
    
    print("🧠 Validating CNN architecture and performance...")
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    # Get final performance
    final_train_acc = history['train_accuracies'][-1]
    final_val_acc = history['val_accuracies'][-1]
    
    # Check for overfitting
    overfitting_gap = final_train_acc - final_val_acc
    
    print(f"📊 Architecture Analysis:")
    print(f"   Total Parameters: {total_params:,}")
    print(f"   Trainable Parameters: {trainable_params:,}")
    print(f"   Final Training Accuracy: {final_train_acc:.2f}%")
    print(f"   Final Validation Accuracy: {final_val_acc:.2f}%")
    print(f"   Overfitting Gap: {overfitting_gap:.2f}%")
    
    # Validation checks
    assert total_params < 20_000_000, f"Too many parameters: {total_params:,}"
    assert final_train_acc > 60.0, f"Training accuracy too low: {final_train_acc:.2f}%"
    assert final_val_acc > 50.0, f"Validation accuracy too low: {final_val_acc:.2f}%"
    assert overfitting_gap < 30.0, f"Severe overfitting detected: {overfitting_gap:.2f}%"
    
    print("\n🎉 CNN architecture validation passed!")
    print("🚀 Ready for next tutorial: Data Augmentation Pipeline")
    
    return True

# Run validation
validate_cnn_architecture(model, history)

## 📚 Summary

You've successfully designed CNNs for digital pathology:

1. **Custom Architecture**: Built PathologyNet with batch normalization and dropout
2. **Transfer Learning**: Implemented pre-trained backbone adaptation
3. **Training Pipeline**: Complete training loop with validation
4. **Performance Monitoring**: Training history visualization and analysis

### Key Architecture Principles
- **Batch Normalization**: Stabilizes training and improves convergence
- **Dropout**: Prevents overfitting in fully connected layers  
- **Adaptive Pooling**: Handles variable input sizes
- **Transfer Learning**: Leverages pre-trained ImageNet features

### Best Practices for Pathology CNNs
✅ **Use data augmentation** extensively for limited medical data  
✅ **Monitor overfitting** carefully with validation metrics  
✅ **Consider class imbalance** in loss function design  
✅ **Validate on external datasets** before clinical deployment  

### Next Steps
- **Tutorial 2**: Advanced data augmentation techniques
- **Tutorial 3**: Model training optimization and regularization
- **Tutorial 4**: Hyperparameter tuning and architecture search

🎓 **Excellent!** You've mastered CNN design for digital pathology applications!