<a href="https://colab.research.google.com/github/ZCalkins/CIFAR100-Multi-Model-Ablation-Analysis/blob/main/notebooks/cnn_ablation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dependencies

In [None]:
from dataclasses import dataclass, field
from datetime import datetime
import os

from typing import List, Optional, Tuple

import torch
from torch import nn
from torch.nn.modules.activation import LeakyReLU
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, v2

import matplotlib.pyplot as plt

# To check the versions
print(torch.__version__)
print(torchvision.__version__)

2.2.1+cu121
0.17.1+cu121


## Set Global Device Defaults

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.set_default_device(device)

## Organize/Create SummaryWriters for Tensorboard

In [None]:
def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str=None) -> torch.utils.tensorboard.writer.SummaryWriter():

    from datetime import datetime
    import os

    timestamp = datetime.now().strftime("%Y-%m-%d")

    if extra:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)

    print(f"Created SummaryWriter, saving to: {log_dir}")
    return SummaryWriter(log_dir=log_dir)

## Data Augmentations

In [None]:
base_transform_flat = transforms.Compose([
    v2.RandomResizedCrop(224),
    v2.RandomHorizontalFlip(),
    v2.AutoAugment(),
    v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Lambda(lambda x: torch.flatten(x))
])

base_transform = transforms.Compose([
    v2.RandomResizedCrop(224),
    v2.RandomHorizontalFlip(),
    v2.AutoAugment(),
    v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

base_transform_flat_no_augment = transforms.Compose([
    v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
    transforms.Lambda(lambda x: torch.flatten())
])

## Datasets

# **!!!!!!!!!!!!! CHANGE DATASETS. MAKE TRAIN DATA AND TEST DATA FOR GATED MLP (ONE DIMENSIONAL) AS WELL AS FOR ViT AND CNN ARCHITECTURES !!!!!!!!!!!!!!!!**

In [None]:
train_data_one_dim = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
    transform=base_transform_flat_no_augment,
    target_transform=None
)

train_data_augment_one_dim = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
    transform=base_transform_flat,
    target_transform=None
)

train_data_two_dim = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

train_data_augment_two_dim = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
    transform=base_transform,
    target_transform=None
)

test_data_one_dim = datasets.CIFAR100(
    root="data",
    train=False,
    download=True,
    transform=base_transform_flat_no_augment,
    target_transform=None
)

test_data_two_dim = datasets.CIFAR100(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:12<00:00, 13014168.94it/s]


Extracting data/cifar-100-python.tar.gz to data
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
class_names = train_data_two_dim.classes

## DataLoaders

In [None]:
BATCH_SIZE = 32

train_dataloader_one_dim = DataLoader(dataset=train_data_one_dim,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)

train_dataloader_one_dim_augmented = DataLoader(dataset=train_data_augment_one_dim,
                                                batch_size=BATCH_SIZE,
                                                shuffle=True)

train_dataloader_two_dim = DataLoader(dataset=train_data_augment_two_dim,
                                      batch_size=BATCH_SIZE,
                                      )

test_dataloader_one_dim = DataLoader(dataset=test_data_one_dim,
                                     batch_size=BATCH_SIZE,
                                     shuffle=False)

## Creating Layer and Model Templates

### 1. CNN Architecture

In [None]:
# CNN Layer Configuration

@dataclass
class CNNLayerConfig:
    in_channels: int
    out_channels: int
    kernel_size: int
    stride: int = 1
    padding: Optional[int] = None
    use_batch_norm: bool = False
    use_pool: bool = False
    pool_size: Optional[int] = None
    pool_stride: Optional[int] = None
    pool_type: Optional[str] = None
    use_dropout: bool = False
    dropout_rate: Optional[float] = None

# CNN Model Configuration

@dataclass
class CNNModelConfig:
    model_name: str
    layers: List[CNNLayerConfig] = field(default_factory=list)
    input_shape: Tuple[int, int, int] = (3, 32, 32)
    output_shape: int = 100
    optimizer: str = 'adam'
    learning_rate: float = 0.001
    batch_size: int = 64
    num_epochs: int = 10

### 2. ViT Architecture

In [None]:
# ViT Layer Configuration

@dataclass
class ViTLayerConfig:
    num_heads: int
    head_dim: int
    mlp_dim: int
    dropout_rate: float
    attention_dropout_rate: float
    use_layer_norm: bool = True
    layer_norm_eps: Optional[float] = 1e-6

# ViT Model Configuration

@dataclass
class ViTModelConfig:
    model_name: str
    image_size: int
    patch_size: int
    num_channels: int
    hidden_dim: int
    num_layers: int
    num_heads: int
    mlp_dim: int
    num_classes: int
    dropout_rate: float
    attention_dropout_rate: float
    optimizer: str = 'adam'
    learning_rate: float = 0.001
    batch_size: int = 64
    num_epochs: int = 10

### 3. Gated MLP Configuration

In [None]:
# Gated MLP Layer Configuration

@dataclass
class GatedMLPLayerConfig:
    input_dim: int
    output_dim: int
    use_gate: bool
    dropout_rate: float

# Gated MLP Model Configuration

class GatedMLPModelConfig:
    model_name: str
    input_dim: int
    output_dim: int
    layers: List[GatedMLPLayerConfig] = field(default_factory=list)
    optimizer: str = 'adam'
    learning_rate: float = 0.001
    batch_size: int = 64
    num_epochs: int = 10
    dropout_rate: float = 0.1

## Instantiate Model, Configurations, Optimizer and Loss Function