In [1]:
import torch
import torchvision.transforms as T
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.distributions as dist

In [2]:
preprocess = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class GaussianNoise:
    def __init__(self, mean=0.0, std=0.15):
        self.mean = mean
        self.std = std
    def __call__(self, tensor):
        return tensor + torch.normal(self.mean, self.std, tensor.size())

augmented_preprocess = T.Compose([
    T.Resize((128, 128)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=45),
    T.ToTensor(),
    GaussianNoise(mean=0.0, std=0.15),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class MyDataset(Dataset):
    def __init__(self, transform=None, split="train"):
        self.imgs_path = os.path.join("../data/cats_and_dogs_filtered", split)
        self.data = [
            [os.path.join(class_path, img), class_name]
            for class_name in os.listdir(self.imgs_path)
            if os.path.isdir(class_path := os.path.join(self.imgs_path, class_name))
            for img in os.listdir(class_path) if img.endswith('.jpg')
        ]
        self.class_map = {"dogs": 0, "cats": 1}
        self.transform = transform

    def __len__(self): return len(self.data)
    def __getitem__(self, index):
        img_path, class_name = self.data[index]
        img = Image.open(img_path).convert('RGB')
        class_id = torch.tensor(self.class_map[class_name])
        return self.transform(img) if self.transform else img, class_id

class CustomDropout(nn.Module):
    def __init__(self, p=0.5): 
        super().__init__()
        self.p = p
    def forward(self, x):
        if self.training:
            mask = dist.Bernoulli(probs=1-self.p).sample(x.size()).to(x.device)
            return x * mask / (1-self.p)
        return x

def get_weight_magnitude(model):
    total_magnitude = sum(torch.norm(p, p=2).item() for p in model.parameters() if p.requires_grad)
    num_params = sum(1 for _ in model.parameters() if _.requires_grad)
    return total_magnitude / num_params if num_params > 0 else 0

In [3]:
class BaseCatsDogsCNN(nn.Module):
    def __init__(self, dropout_rate=0, dropout_class=nn.Dropout2d):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2), dropout_class(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2), dropout_class(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Flatten(), nn.Linear(64 * 32 * 32, 512), nn.ReLU(),
            nn.Dropout(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Linear(512, 2)
        )
    def forward(self, x): return self.net(x)

def train_epoch(model, dataloader, criterion, optimizer, device, use_explicit_l2=False, lambda_l2=0.01):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        if use_explicit_l2:
            l2_reg = sum(torch.norm(p, p=2) ** 2 for p in model.parameters() if p.requires_grad)
            loss += lambda_l2 * l2_reg
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), 100 * correct / total

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), 100 * correct / total

def train_model(model_class, dropout_rate=0, dropout_class=nn.Dropout2d, weight_decay=0, 
                use_explicit_l2=False, patience=None, max_epochs=5, train_loader=None, val_loader=None):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    if train_loader is None:
        train_loader = DataLoader(MyDataset(preprocess, "train"), batch_size=32, shuffle=True)
    if val_loader is None:
        val_loader = DataLoader(MyDataset(preprocess, "validation"), batch_size=32, shuffle=False)
    
    model = model_class(dropout_rate, dropout_class).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=weight_decay)
    
    train_losses, train_accs, val_losses, val_accs, weight_mags = [], [], [], [], []
    best_val_loss, epochs_no_improve, best_model_state = float('inf'), 0, None
    
    for epoch in range(max_epochs):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, use_explicit_l2)
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        weight_mag = get_weight_magnitude(model)
        
        train_losses.append(train_loss); train_accs.append(train_acc)
        val_losses.append(val_loss); val_accs.append(val_acc)
        weight_mags.append(weight_mag)
        
        print(f"Epoch {epoch+1}: Train Loss {train_loss:.4f}, Acc {train_acc:.2f}%, "
              f"Val Loss {val_loss:.4f}, Acc {val_acc:.2f}%, Weight Mag {weight_mag:.6f}")
        
        if patience:
            if val_loss < best_val_loss:
                best_val_loss, epochs_no_improve, best_model_state = val_loss, 0, model.state_dict()
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    model.load_state_dict(best_model_state)
                    break
    
    return train_losses, train_accs, val_losses, val_accs, weight_mags

def print_comparison(name1, name2, res1, res2):
    print(f"\nResults Comparison:")
    for name, res in [(name1, res1), (name2, res2)]:
        print(f"{name}:")
        print(f"Final Train Loss: {res[0][-1]:.4f}, Acc: {res[1][-1]:.2f}%")
        print(f"Final Val Loss: {res[2][-1]:.4f}, Acc: {res[3][-1]:.2f}%")
    gap1, gap2 = res1[1][-1] - res1[3][-1], res2[1][-1] - res2[3][-1]
    print(f"\nOverfitting: {name1} Gap: {gap1:.2f}%, {name2} Gap: {gap2:.2f}%")

## Sample


In [4]:
print("Sample Question: Data Augmentation Experiment")
print("Training without Data Augmentation")
no_aug_results = train_model(BaseCatsDogsCNN, dropout_rate=0, max_epochs=5)

print("\nTraining with Data Augmentation")
train_loader_aug = DataLoader(MyDataset(augmented_preprocess, "train"), batch_size=32, shuffle=True)
val_loader_base = DataLoader(MyDataset(preprocess, "validation"), batch_size=32, shuffle=False)
aug_results = train_model(BaseCatsDogsCNN, dropout_rate=0, max_epochs=5, 
                          train_loader=train_loader_aug, val_loader=val_loader_base)

print_comparison("No Augmentation", "With Augmentation", no_aug_results, aug_results)

Sample Question: Data Augmentation Experiment
Training without Data Augmentation
Epoch 1: Train Loss 6.3497, Acc 57.65%, Val Loss 0.6801, Acc 59.60%, Weight Mag 4.019593
Epoch 2: Train Loss 0.5966, Acc 68.50%, Val Loss 0.7090, Acc 60.70%, Weight Mag 4.068218
Epoch 3: Train Loss 0.5609, Acc 71.40%, Val Loss 0.6990, Acc 64.50%, Weight Mag 4.116168
Epoch 4: Train Loss 0.4952, Acc 76.60%, Val Loss 0.6523, Acc 65.70%, Weight Mag 4.141978
Epoch 5: Train Loss 0.5041, Acc 74.25%, Val Loss 1.1694, Acc 63.90%, Weight Mag 4.170441

Training with Data Augmentation
Epoch 1: Train Loss 8.0867, Acc 51.85%, Val Loss 1.0776, Acc 50.90%, Weight Mag 4.116087
Epoch 2: Train Loss 0.7839, Acc 55.15%, Val Loss 0.9062, Acc 50.20%, Weight Mag 4.126465
Epoch 3: Train Loss 0.8064, Acc 54.40%, Val Loss 0.8151, Acc 52.60%, Weight Mag 4.130725
Epoch 4: Train Loss 0.7717, Acc 53.10%, Val Loss 1.0358, Acc 51.30%, Weight Mag 4.132299
Epoch 5: Train Loss 0.7591, Acc 55.15%, Val Loss 0.7409, Acc 54.40%, Weight Mag 4.133

## Q1

In [5]:
print("\nWeight Decay Experiment")
wd_results = train_model(BaseCatsDogsCNN, weight_decay=0.01)
print("Results:", [f"{x:.6f}" for x in wd_results[4]])

print("\nExplicit L2 Experiment")
l2_results = train_model(BaseCatsDogsCNN, use_explicit_l2=True)
print("Results:", [f"{x:.6f}" for x in l2_results[4]])


Weight Decay Experiment
Epoch 1: Train Loss 5.1082, Acc 54.95%, Val Loss 1.2448, Acc 60.40%, Weight Mag 2.748859
Epoch 2: Train Loss 0.7315, Acc 66.55%, Val Loss 0.7141, Acc 65.30%, Weight Mag 2.424294
Epoch 3: Train Loss 0.7745, Acc 66.05%, Val Loss 0.7515, Acc 62.30%, Weight Mag 2.326823
Epoch 4: Train Loss 0.6393, Acc 71.75%, Val Loss 0.8929, Acc 62.20%, Weight Mag 2.201951
Epoch 5: Train Loss 0.4873, Acc 76.10%, Val Loss 0.7805, Acc 63.70%, Weight Mag 2.241929
Results: ['2.748859', '2.424294', '2.326823', '2.201951', '2.241929']

Explicit L2 Experiment
Epoch 1: Train Loss 11.7300, Acc 53.75%, Val Loss 0.7536, Acc 55.60%, Weight Mag 2.684098
Epoch 2: Train Loss 3.4410, Acc 61.85%, Val Loss 0.9233, Acc 60.40%, Weight Mag 2.336509
Epoch 3: Train Loss 2.5465, Acc 67.35%, Val Loss 0.6620, Acc 64.30%, Weight Mag 2.102224
Epoch 4: Train Loss 2.2481, Acc 69.55%, Val Loss 0.6093, Acc 67.70%, Weight Mag 2.021691
Epoch 5: Train Loss 2.1196, Acc 69.00%, Val Loss 1.1260, Acc 56.40%, Weight Mag

## Q3

In [6]:
print("\nNo Dropout")
no_drop = train_model(BaseCatsDogsCNN, dropout_rate=0)
print("\nWith Dropout")
with_drop = train_model(BaseCatsDogsCNN, dropout_rate=0.5)
print_comparison("No Dropout", "With Dropout", no_drop, with_drop)


No Dropout
Epoch 1: Train Loss 3.1450, Acc 58.50%, Val Loss 0.7381, Acc 59.40%, Weight Mag 3.948224
Epoch 2: Train Loss 0.6349, Acc 66.00%, Val Loss 0.8315, Acc 55.40%, Weight Mag 3.970821
Epoch 3: Train Loss 0.5429, Acc 73.50%, Val Loss 0.6404, Acc 67.30%, Weight Mag 4.001426
Epoch 4: Train Loss 0.4890, Acc 76.10%, Val Loss 0.6254, Acc 68.20%, Weight Mag 4.036890
Epoch 5: Train Loss 0.4388, Acc 79.10%, Val Loss 0.7436, Acc 67.50%, Weight Mag 4.065912

With Dropout
Epoch 1: Train Loss 3.4063, Acc 49.45%, Val Loss 0.6918, Acc 50.50%, Weight Mag 4.068934
Epoch 2: Train Loss 0.7181, Acc 51.85%, Val Loss 0.6822, Acc 58.80%, Weight Mag 4.142907
Epoch 3: Train Loss 0.6998, Acc 54.05%, Val Loss 0.6814, Acc 61.10%, Weight Mag 4.193637
Epoch 4: Train Loss 0.7113, Acc 53.65%, Val Loss 0.7091, Acc 52.90%, Weight Mag 4.248659
Epoch 5: Train Loss 0.7033, Acc 53.70%, Val Loss 0.6785, Acc 58.80%, Weight Mag 4.300372

Results Comparison:
No Dropout:
Final Train Loss: 0.4388, Acc: 79.10%
Final Val Los

## Q4

In [7]:
print("\nBuilt-in Dropout")
builtin = train_model(BaseCatsDogsCNN, dropout_rate=0.5, dropout_class=nn.Dropout2d)
print("\nCustom Dropout")
custom = train_model(BaseCatsDogsCNN, dropout_rate=0.5, dropout_class=CustomDropout)
print_comparison("Built-in Dropout", "Custom Dropout", builtin, custom)


Built-in Dropout
Epoch 1: Train Loss 3.9131, Acc 49.95%, Val Loss 0.6989, Acc 51.00%, Weight Mag 4.062365
Epoch 2: Train Loss 0.7198, Acc 53.75%, Val Loss 0.6884, Acc 54.30%, Weight Mag 4.142837
Epoch 3: Train Loss 0.6988, Acc 56.80%, Val Loss 0.6781, Acc 59.40%, Weight Mag 4.182901
Epoch 4: Train Loss 0.7032, Acc 53.65%, Val Loss 0.6862, Acc 57.40%, Weight Mag 4.237594
Epoch 5: Train Loss 0.6856, Acc 54.85%, Val Loss 0.6675, Acc 60.60%, Weight Mag 4.309689

Custom Dropout
Epoch 1: Train Loss 9.2628, Acc 52.25%, Val Loss 0.7141, Acc 51.30%, Weight Mag 3.994263
Epoch 2: Train Loss 0.6697, Acc 59.40%, Val Loss 0.6562, Acc 59.20%, Weight Mag 4.013122
Epoch 3: Train Loss 0.6453, Acc 61.55%, Val Loss 0.6427, Acc 59.60%, Weight Mag 4.026767
Epoch 4: Train Loss 0.6370, Acc 64.25%, Val Loss 0.6399, Acc 60.60%, Weight Mag 4.034796
Epoch 5: Train Loss 0.6333, Acc 65.35%, Val Loss 0.6370, Acc 61.20%, Weight Mag 4.042430

Results Comparison:
Built-in Dropout:
Final Train Loss: 0.6856, Acc: 54.85%

## Q5

In [8]:
print("\nNo Early Stopping")
no_es = train_model(BaseCatsDogsCNN, dropout_rate=0.5)
print("\nWith Early Stopping")
es = train_model(BaseCatsDogsCNN, dropout_rate=0.5, patience=2, max_epochs=100)
print_comparison("No Early Stopping", "With Early Stopping", no_es, es)


No Early Stopping
Epoch 1: Train Loss 7.0145, Acc 51.35%, Val Loss 0.7034, Acc 43.70%, Weight Mag 4.034861
Epoch 2: Train Loss 0.7090, Acc 51.25%, Val Loss 0.6873, Acc 59.50%, Weight Mag 4.113165
Epoch 3: Train Loss 0.7046, Acc 52.05%, Val Loss 0.6823, Acc 60.50%, Weight Mag 4.162872
Epoch 4: Train Loss 0.6973, Acc 54.25%, Val Loss 0.6784, Acc 59.80%, Weight Mag 4.202078
Epoch 5: Train Loss 0.6961, Acc 54.00%, Val Loss 0.6890, Acc 54.60%, Weight Mag 4.246033

With Early Stopping
Epoch 1: Train Loss 5.6804, Acc 53.65%, Val Loss 0.6848, Acc 56.90%, Weight Mag 4.024686
Epoch 2: Train Loss 0.7127, Acc 53.90%, Val Loss 0.6673, Acc 59.60%, Weight Mag 4.090043
Epoch 3: Train Loss 0.6983, Acc 56.25%, Val Loss 0.6757, Acc 56.90%, Weight Mag 4.144546
Epoch 4: Train Loss 0.6950, Acc 56.90%, Val Loss 0.6750, Acc 57.80%, Weight Mag 4.195810
Early stopping at epoch 4

Results Comparison:
No Early Stopping:
Final Train Loss: 0.6961, Acc: 54.00%
Final Val Loss: 0.6890, Acc: 54.60%
With Early Stopping