# Module 7: Computer Vision - Exercise Solutions

This notebook contains solutions for all "Try It Yourself" exercises and challenges from Module 7.

---

## Table of Contents

1. [Task 7.1 Solutions - CNN Architectures](#task-71)
2. [Task 7.2 Solutions - Transfer Learning](#task-72)
3. [Task 7.3 Solutions - Object Detection](#task-73)
4. [Task 7.4 Solutions - Segmentation](#task-74)
5. [Task 7.5 Solutions - Vision Transformer](#task-75)
6. [Task 7.6 Solutions - SAM](#task-76)

In [None]:
# Common imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from typing import Tuple, List, Dict

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

---

<a id='task-71'></a>
## Task 7.1 Solutions: CNN Architecture Study

### Exercise 1: LeNet with MaxPooling

In [None]:
class LeNet5_MaxPool(nn.Module):
    """
    LeNet-5 with MaxPooling instead of AvgPooling.
    
    MaxPooling typically works better for object recognition because
    it keeps the strongest activations ("Was there an edge here?")
    rather than averaging ("How much edge on average?").
    """
    
    def __init__(self, num_classes: int = 10):
        super(LeNet5_MaxPool, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Changed from AvgPool
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Test
model = LeNet5_MaxPool()
x = torch.randn(1, 3, 32, 32)
print(f"LeNet5_MaxPool output shape: {model(x).shape}")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

### Exercise 2: Compare Gradient Flow in ResNet vs Plain Network

In [None]:
class PlainBlock(nn.Module):
    """Plain convolutional block without skip connection."""
    def __init__(self, channels):
        super(PlainBlock, self).__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        return F.relu(out)  # No skip connection!


class ResidualBlock(nn.Module):
    """Residual block with skip connection."""
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        return F.relu(out + x)  # Skip connection!


def compare_gradient_flow(num_blocks: int = 10):
    """
    Compare gradient magnitudes through stacked blocks.
    
    As depth increases, gradients in plain networks tend to vanish,
    while residual networks maintain gradient flow.
    """
    channels = 64
    
    # Stack blocks
    plain_blocks = nn.Sequential(*[PlainBlock(channels) for _ in range(num_blocks)])
    res_blocks = nn.Sequential(*[ResidualBlock(channels) for _ in range(num_blocks)])
    
    # Create input with gradient tracking
    x_plain = torch.randn(1, channels, 32, 32, requires_grad=True)
    x_res = torch.randn(1, channels, 32, 32, requires_grad=True)
    
    # Forward pass
    out_plain = plain_blocks(x_plain)
    out_res = res_blocks(x_res)
    
    # Backward pass
    out_plain.sum().backward()
    out_res.sum().backward()
    
    # Compare gradient magnitudes
    grad_plain = x_plain.grad.abs().mean().item()
    grad_res = x_res.grad.abs().mean().item()
    
    return grad_plain, grad_res

# Compare at different depths
print("Gradient Magnitude Comparison (Higher is Better)")
print("="*50)
print(f"{'Depth':<10} {'Plain Network':<20} {'ResNet':<20}")
print("-"*50)

for depth in [5, 10, 20, 30]:
    grad_plain, grad_res = compare_gradient_flow(depth)
    print(f"{depth:<10} {grad_plain:<20.6f} {grad_res:<20.6f}")

print("\nConclusion: ResNet maintains gradient flow even at great depth!")

### Challenge: Squeeze-and-Excitation ResNet

In [None]:
class SEBlock(nn.Module):
    """
    Squeeze-and-Excitation block.
    
    Learns to weight channels by their importance.
    Paper: "Squeeze-and-Excitation Networks" (Hu et al., 2018)
    """
    def __init__(self, channels: int, reduction: int = 16):
        super(SEBlock, self).__init__()
        self.squeeze = nn.AdaptiveAvgPool2d(1)
        self.excite = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        b, c, _, _ = x.size()
        # Squeeze: global average pooling
        y = self.squeeze(x).view(b, c)
        # Excite: learn channel weights
        y = self.excite(y).view(b, c, 1, 1)
        # Scale: multiply input by channel weights
        return x * y


class SEBasicBlock(nn.Module):
    """ResNet BasicBlock with SE attention."""
    expansion = 1
    
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1, reduction: int = 16):
        super(SEBasicBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # SE Block
        self.se = SEBlock(out_channels, reduction)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)  # Apply SE attention
        out += self.shortcut(x)
        return F.relu(out)


class SEResNet18(nn.Module):
    """ResNet-18 with Squeeze-and-Excitation blocks."""
    
    def __init__(self, num_classes: int = 10):
        super(SEResNet18, self).__init__()
        
        self.in_channels = 64
        
        self.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.layer1 = self._make_layer(64, 2, 1)
        self.layer2 = self._make_layer(128, 2, 2)
        self.layer3 = self._make_layer(256, 2, 2)
        self.layer4 = self._make_layer(512, 2, 2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, out_channels: int, num_blocks: int, stride: int) -> nn.Sequential:
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(SEBasicBlock(self.in_channels, out_channels, s))
            self.in_channels = out_channels
        return nn.Sequential(*layers)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Test SE-ResNet
se_resnet = SEResNet18(num_classes=10)
x = torch.randn(1, 3, 32, 32)
print(f"SE-ResNet18 output: {se_resnet(x).shape}")
print(f"Parameters: {sum(p.numel() for p in se_resnet.parameters()):,}")

---

<a id='task-72'></a>
## Task 7.2 Solutions: Transfer Learning

### Achieving >90% Accuracy on CIFAR-100

In [None]:
# Solution approach for >90% on CIFAR-100:
# 1. Use a larger model (EfficientNet-B3 or ConvNeXt)
# 2. Full dataset (not subset)
# 3. Longer training (50+ epochs)
# 4. Advanced techniques: Mixup, Label Smoothing

# Here's the configuration that should achieve >90%:

training_config = {
    'model': 'efficientnet_b3',  # Larger model
    'dataset_subset': None,  # Full dataset (50,000 images)
    'epochs': 50,
    'batch_size': 32,
    'base_lr': 1e-4,
    'weight_decay': 0.1,
    'warmup_epochs': 5,
    'label_smoothing': 0.1,
    'mixup_alpha': 0.2,
    'strategy': 'gradual_unfreeze',
}

print("Configuration for >90% accuracy on CIFAR-100:")
for key, value in training_config.items():
    print(f"  {key}: {value}")

# Mixup augmentation implementation
def mixup_data(x, y, alpha=0.2):
    """Apply Mixup augmentation."""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)
    
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    """Compute loss for Mixup."""
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

---

<a id='task-73'></a>
## Task 7.3 Solutions: Object Detection

### Detecting Specific Classes

In [None]:
# Example: Custom image detection
# Note: Run this after installing ultralytics

def detect_custom_image(image_url: str, classes_to_detect: List[str] = None):
    """
    Download and detect objects in a custom image.
    
    Args:
        image_url: URL of image to process
        classes_to_detect: Optional list of class names to filter
    """
    import urllib.request
    from pathlib import Path
    
    # Download image
    image_path = Path('custom_image.jpg')
    urllib.request.urlretrieve(image_url, image_path)
    
    # Load YOLO
    try:
        from ultralytics import YOLO
        model = YOLO('yolov8s.pt')
        
        # Get class indices if filtering
        if classes_to_detect:
            class_to_idx = {v: k for k, v in model.names.items()}
            class_indices = [class_to_idx[c] for c in classes_to_detect if c in class_to_idx]
            results = model(str(image_path), classes=class_indices)
        else:
            results = model(str(image_path))
        
        # Display
        annotated = results[0].plot()
        plt.figure(figsize=(12, 8))
        plt.imshow(annotated[:, :, ::-1])
        plt.title(f'Detections: {len(results[0].boxes)} objects found')
        plt.axis('off')
        plt.show()
        
        # Cleanup
        image_path.unlink()
        
        return results[0]
    except ImportError:
        print("Please install ultralytics: pip install ultralytics")
        return None

# Example usage (uncomment to run):
# results = detect_custom_image(
#     'https://images.unsplash.com/photo-1517849845537-4d257902454a',
#     classes_to_detect=['dog', 'person']
# )

---

<a id='task-74'></a>
## Task 7.4 Solutions: Segmentation

### Larger U-Net Architecture

In [None]:
class LargerUNet(nn.Module):
    """
    Larger U-Net with more channels.
    
    Original: 64 -> 128 -> 256 -> 512 -> 1024
    This:     128 -> 256 -> 512 -> 1024 -> 2048
    """
    
    def __init__(self, n_channels: int = 3, n_classes: int = 21):
        super(LargerUNet, self).__init__()
        
        def double_conv(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True)
            )
        
        # Encoder (double the channels)
        self.inc = double_conv(n_channels, 128)
        self.down1 = nn.Sequential(nn.MaxPool2d(2), double_conv(128, 256))
        self.down2 = nn.Sequential(nn.MaxPool2d(2), double_conv(256, 512))
        self.down3 = nn.Sequential(nn.MaxPool2d(2), double_conv(512, 1024))
        self.down4 = nn.Sequential(nn.MaxPool2d(2), double_conv(1024, 1024))
        
        # Decoder
        self.up1 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
        self.conv_up1 = double_conv(1024 + 512, 512)
        
        self.up2 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.conv_up2 = double_conv(512 + 256, 256)
        
        self.up3 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.conv_up3 = double_conv(256 + 128, 128)
        
        self.up4 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.conv_up4 = double_conv(128 + 64, 64)
        
        self.outc = nn.Conv2d(64, n_classes, 1)
    
    def forward(self, x):
        # Encoder
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        
        # Decoder
        x = self.up1(x5)
        x = torch.cat([x, x4], dim=1)
        x = self.conv_up1(x)
        
        x = self.up2(x)
        x = torch.cat([x, x3], dim=1)
        x = self.conv_up2(x)
        
        x = self.up3(x)
        x = torch.cat([x, x2], dim=1)
        x = self.conv_up3(x)
        
        x = self.up4(x)
        x = torch.cat([x, x1], dim=1)
        x = self.conv_up4(x)
        
        return self.outc(x)

# Test
large_unet = LargerUNet(n_classes=21)
x = torch.randn(1, 3, 256, 256)
print(f"Larger U-Net output: {large_unet(x).shape}")
print(f"Parameters: {sum(p.numel() for p in large_unet.parameters()):,}")

---

<a id='task-75'></a>
## Task 7.5 Solutions: Vision Transformer

### Comparing Different Patch Sizes

In [None]:
class PatchEmbedding(nn.Module):
    """Patch embedding for ViT."""
    def __init__(self, img_size, patch_size, in_channels, embed_dim):
        super().__init__()
        self.num_patches = (img_size // patch_size) ** 2
        self.projection = nn.Conv2d(in_channels, embed_dim, patch_size, stride=patch_size)
    
    def forward(self, x):
        x = self.projection(x)
        x = x.flatten(2).transpose(1, 2)
        return x


def compare_patch_sizes(img_size: int = 32, embed_dim: int = 256):
    """
    Compare different patch sizes for ViT.
    
    Smaller patches = more tokens = more compute but potentially better accuracy.
    """
    patch_sizes = [2, 4, 8, 16]
    
    print(f"Patch Size Comparison for {img_size}x{img_size} images")
    print("="*60)
    print(f"{'Patch Size':<12} {'Num Patches':<15} {'Seq Length':<15} {'Params':<15}")
    print("-"*60)
    
    for patch_size in patch_sizes:
        if img_size % patch_size != 0:
            continue
        
        patch_embed = PatchEmbedding(img_size, patch_size, 3, embed_dim)
        num_patches = (img_size // patch_size) ** 2
        seq_length = num_patches + 1  # +1 for CLS token
        params = sum(p.numel() for p in patch_embed.parameters())
        
        print(f"{patch_size:<12} {num_patches:<15} {seq_length:<15} {params:,}")

compare_patch_sizes()

### DeiT with Distillation Token

In [None]:
class DeiT(nn.Module):
    """
    Data-efficient Image Transformer (DeiT).
    
    Adds a distillation token that learns from a CNN teacher.
    Paper: "Training data-efficient image transformers" (Touvron et al., 2021)
    """
    
    def __init__(
        self,
        img_size: int = 32,
        patch_size: int = 4,
        num_classes: int = 10,
        embed_dim: int = 256,
        depth: int = 6,
        num_heads: int = 8
    ):
        super(DeiT, self).__init__()
        
        num_patches = (img_size // patch_size) ** 2
        
        # Patch embedding
        self.patch_embed = nn.Conv2d(3, embed_dim, patch_size, stride=patch_size)
        
        # CLS token and DISTILLATION token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim))  # New!
        
        # Position embedding for patches + cls + dist tokens
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 2, embed_dim))
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=embed_dim * 4,
            dropout=0.1,
            activation='gelu',
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.norm = nn.LayerNorm(embed_dim)
        
        # Two classification heads
        self.head = nn.Linear(embed_dim, num_classes)  # For CLS token
        self.dist_head = nn.Linear(embed_dim, num_classes)  # For distillation token
        
        # Initialize
        nn.init.trunc_normal_(self.cls_token, std=0.02)
        nn.init.trunc_normal_(self.dist_token, std=0.02)
        nn.init.trunc_normal_(self.pos_embed, std=0.02)
    
    def forward(self, x: torch.Tensor):
        B = x.shape[0]
        
        # Patch embedding
        x = self.patch_embed(x).flatten(2).transpose(1, 2)
        
        # Prepend CLS and DIST tokens
        cls_tokens = self.cls_token.expand(B, -1, -1)
        dist_tokens = self.dist_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, dist_tokens, x], dim=1)  # [B, N+2, D]
        
        # Add positional embedding
        x = x + self.pos_embed
        
        # Transformer
        x = self.encoder(x)
        x = self.norm(x)
        
        # Two outputs
        cls_output = self.head(x[:, 0])  # From CLS token
        dist_output = self.dist_head(x[:, 1])  # From DIST token
        
        # During inference, average both predictions
        if not self.training:
            return (cls_output + dist_output) / 2
        
        return cls_output, dist_output

# Test DeiT
deit = DeiT(img_size=32, patch_size=4, num_classes=10)
x = torch.randn(2, 3, 32, 32)

# Training mode (returns two outputs)
deit.train()
cls_out, dist_out = deit(x)
print(f"DeiT training outputs: cls={cls_out.shape}, dist={dist_out.shape}")

# Eval mode (returns averaged output)
deit.eval()
out = deit(x)
print(f"DeiT inference output: {out.shape}")
print(f"Parameters: {sum(p.numel() for p in deit.parameters()):,}")

---

<a id='task-76'></a>
## Task 7.6 Solutions: SAM

### Magic Wand Tool

In [None]:
class MagicWand:
    """
    Interactive "Magic Wand" tool using SAM.
    
    Allows iterative refinement with positive and negative clicks.
    """
    
    def __init__(self, sam_predictor):
        """
        Args:
            sam_predictor: SamPredictor instance
        """
        self.predictor = sam_predictor
        self.reset()
    
    def reset(self):
        """Clear all clicks and masks."""
        self.positive_points = []
        self.negative_points = []
        self.current_mask = None
        self.all_masks = []  # History of masks
    
    def set_image(self, image):
        """Set the image to segment."""
        self.predictor.set_image(image)
        self.image = image
        self.reset()
    
    def click(self, point, is_positive=True):
        """
        Add a click point and update the mask.
        
        Args:
            point: (x, y) coordinates
            is_positive: True for "include", False for "exclude"
        
        Returns:
            Updated mask
        """
        if is_positive:
            self.positive_points.append(point)
        else:
            self.negative_points.append(point)
        
        self._update_mask()
        return self.current_mask
    
    def undo(self):
        """Undo last click."""
        if self.positive_points:
            self.positive_points.pop()
        elif self.negative_points:
            self.negative_points.pop()
        
        if self.positive_points or self.negative_points:
            self._update_mask()
        else:
            self.current_mask = None
        
        return self.current_mask
    
    def grow(self):
        """Get a larger mask (if available from multi-mask output)."""
        if len(self.all_masks) > 1:
            # Return larger mask
            areas = [m.sum() for m in self.all_masks]
            largest_idx = areas.index(max(areas))
            self.current_mask = self.all_masks[largest_idx]
        return self.current_mask
    
    def shrink(self):
        """Get a smaller mask (if available from multi-mask output)."""
        if len(self.all_masks) > 1:
            # Return smaller mask
            areas = [m.sum() for m in self.all_masks]
            smallest_idx = areas.index(min(areas))
            self.current_mask = self.all_masks[smallest_idx]
        return self.current_mask
    
    def _update_mask(self):
        """Update mask based on all accumulated points."""
        if not self.positive_points and not self.negative_points:
            self.current_mask = None
            return
        
        all_points = self.positive_points + self.negative_points
        labels = [1] * len(self.positive_points) + [0] * len(self.negative_points)
        
        masks, scores, _ = self.predictor.predict(
            point_coords=np.array(all_points),
            point_labels=np.array(labels),
            multimask_output=True
        )
        
        # Store all masks for grow/shrink
        self.all_masks = [masks[i] for i in range(len(masks))]
        
        # Use highest scoring mask as default
        self.current_mask = masks[scores.argmax()]
    
    def visualize(self):
        """Visualize current state."""
        fig, axes = plt.subplots(1, 2, figsize=(14, 6))
        
        # Image with points
        axes[0].imshow(self.image)
        for p in self.positive_points:
            axes[0].scatter(p[0], p[1], c='green', s=200, marker='*', edgecolors='white', linewidths=2)
        for p in self.negative_points:
            axes[0].scatter(p[0], p[1], c='red', s=200, marker='x', linewidths=3)
        axes[0].set_title(f'Clicks: {len(self.positive_points)} positive, {len(self.negative_points)} negative')
        axes[0].axis('off')
        
        # Mask overlay
        axes[1].imshow(self.image)
        if self.current_mask is not None:
            colored_mask = np.zeros((*self.current_mask.shape, 4))
            colored_mask[self.current_mask] = [0.3, 0.7, 0.3, 0.6]
            axes[1].imshow(colored_mask)
        axes[1].set_title('Current Mask')
        axes[1].axis('off')
        
        plt.tight_layout()
        plt.show()

print("MagicWand class defined successfully!")
print("\nUsage:")
print("  wand = MagicWand(sam_predictor)")
print("  wand.set_image(image)")
print("  wand.click((x, y), is_positive=True)")
print("  wand.click((x, y), is_positive=False)  # Exclude region")
print("  wand.grow()  # Get larger mask")
print("  wand.shrink()  # Get smaller mask")
print("  wand.undo()  # Remove last click")
print("  wand.visualize()  # Show current state")

---

## Summary

This notebook contains solutions for all exercises in Module 7:

1. **Task 7.1**: LeNet with MaxPool, gradient flow comparison, SE-ResNet
2. **Task 7.2**: Configuration for >90% on CIFAR-100, Mixup implementation
3. **Task 7.3**: Custom image detection with YOLO
4. **Task 7.4**: Larger U-Net architecture
5. **Task 7.5**: Patch size comparison, DeiT with distillation
6. **Task 7.6**: Interactive MagicWand tool with SAM

Remember: The best way to learn is to try implementing these yourself first!