In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.distributions as dist

preprocess = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class MyDataset(Dataset):
    def __init__(self, transform=None, split="train"):
        self.imgs_path = os.path.join("../data/cats_and_dogs_filtered", split)
        self.data = [
            [os.path.join(class_path, img), class_name]
            for class_name in os.listdir(self.imgs_path)
            if os.path.isdir(class_path := os.path.join(self.imgs_path, class_name))
            for img in os.listdir(class_path) if img.endswith('.jpg')
        ]
        self.class_map = {"dogs": 0, "cats": 1}
        self.transform = transform

    def __len__(self): return len(self.data)
    def __getitem__(self, index):
        img_path, class_name = self.data[index]
        img = Image.open(img_path).convert('RGB')
        class_id = torch.tensor(self.class_map[class_name])
        return self.transform(img) if self.transform else img, class_id

class CustomDropout(nn.Module):
    def __init__(self, p=0.5): 
        super().__init__()
        self.p = p
    def forward(self, x):
        if self.training:
            mask = dist.Bernoulli(probs=1-self.p).sample(x.size()).to(x.device)
            return x * mask / (1-self.p)
        return x

def get_weight_magnitude(model):
    total_magnitude = sum(torch.norm(p, p=2).item() for p in model.parameters() if p.requires_grad)
    num_params = sum(1 for _ in model.parameters() if _.requires_grad)
    return total_magnitude / num_params if num_params > 0 else 0

In [2]:
class BaseCatsDogsCNN(nn.Module):
    def __init__(self, dropout_rate=0, dropout_class=nn.Dropout2d):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2), dropout_class(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2), dropout_class(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Flatten(), nn.Linear(64 * 32 * 32, 512), nn.ReLU(),
            nn.Dropout(dropout_rate) if dropout_rate > 0 else nn.Identity(),
            nn.Linear(512, 2)
        )
    def forward(self, x): return self.net(x)

def train_epoch(model, dataloader, criterion, optimizer, device, use_explicit_l2=False, lambda_l2=0.01):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        if use_explicit_l2:
            l2_reg = sum(torch.norm(p, p=2) ** 2 for p in model.parameters() if p.requires_grad)
            loss += lambda_l2 * l2_reg
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), 100 * correct / total

def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), 100 * correct / total

def train_model(model_class, dropout_rate=0, dropout_class=nn.Dropout2d, weight_decay=0, 
                use_explicit_l2=False, patience=None, max_epochs=5):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    train_loader = DataLoader(MyDataset(preprocess, "train"), batch_size=32, shuffle=True)
    val_loader = DataLoader(MyDataset(preprocess, "validation"), batch_size=32, shuffle=False)
    
    model = model_class(dropout_rate, dropout_class).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=weight_decay)
    
    train_losses, train_accs, val_losses, val_accs, weight_mags = [], [], [], [], []
    best_val_loss, epochs_no_improve, best_model_state = float('inf'), 0, None
    
    for epoch in range(max_epochs):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, use_explicit_l2)
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        weight_mag = get_weight_magnitude(model)
        
        train_losses.append(train_loss); train_accs.append(train_acc)
        val_losses.append(val_loss); val_accs.append(val_acc)
        weight_mags.append(weight_mag)
        
        print(f"Epoch {epoch+1}: Train Loss {train_loss:.4f}, Acc {train_acc:.2f}%, "
              f"Val Loss {val_loss:.4f}, Acc {val_acc:.2f}%, Weight Mag {weight_mag:.6f}")
        
        if patience:
            if val_loss < best_val_loss:
                best_val_loss, epochs_no_improve, best_model_state = val_loss, 0, model.state_dict()
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    model.load_state_dict(best_model_state)
                    break
    
    return train_losses, train_accs, val_losses, val_accs, weight_mags

In [3]:
print("Weight Decay Experiment")
wd_results = train_model(BaseCatsDogsCNN, weight_decay=0.01)

print("\nExplicit L2 Experiment")
l2_results = train_model(BaseCatsDogsCNN, use_explicit_l2=True)

print("\nResults:")
print("Weight Decay:", [f"{x:.6f}" for x in wd_results[4]])
print("Explicit L2:", [f"{x:.6f}" for x in l2_results[4]])

Weight Decay Experiment
Epoch 1: Train Loss 3.0854, Acc 59.25%, Val Loss 0.8838, Acc 63.90%, Weight Mag 2.711104
Epoch 2: Train Loss 0.8403, Acc 67.75%, Val Loss 1.0829, Acc 63.40%, Weight Mag 2.437612
Epoch 3: Train Loss 0.5821, Acc 74.20%, Val Loss 0.6757, Acc 67.40%, Weight Mag 2.272397
Epoch 4: Train Loss 0.4020, Acc 81.50%, Val Loss 0.6950, Acc 65.30%, Weight Mag 2.150402
Epoch 5: Train Loss 0.3642, Acc 84.35%, Val Loss 0.6440, Acc 69.40%, Weight Mag 2.084958

Explicit L2 Experiment
Epoch 1: Train Loss 9.8884, Acc 54.60%, Val Loss 0.6883, Acc 59.40%, Weight Mag 2.731243
Epoch 2: Train Loss 3.4253, Acc 61.55%, Val Loss 0.6122, Acc 66.60%, Weight Mag 2.267671
Epoch 3: Train Loss 2.4797, Acc 65.70%, Val Loss 0.6789, Acc 62.60%, Weight Mag 2.084514
Epoch 4: Train Loss 2.2582, Acc 69.20%, Val Loss 0.5999, Acc 68.00%, Weight Mag 1.997243
Epoch 5: Train Loss 2.0612, Acc 70.85%, Val Loss 0.6370, Acc 67.00%, Weight Mag 1.919192

Results:
Weight Decay: ['2.711104', '2.437612', '2.272397', '

In [4]:
def print_comparison(name1, name2, res1, res2):
    print(f"\nResults Comparison:")
    for name, res in [(name1, res1), (name2, res2)]:
        print(f"{name}:")
        print(f"Final Train Loss: {res[0][-1]:.4f}, Acc: {res[1][-1]:.2f}%")
        print(f"Final Val Loss: {res[2][-1]:.4f}, Acc: {res[3][-1]:.2f}%")
    gap1, gap2 = res1[1][-1] - res1[3][-1], res2[1][-1] - res2[3][-1]
    print(f"\nOverfitting: {name1} Gap: {gap1:.2f}%, {name2} Gap: {gap2:.2f}%")

print("No Dropout")
no_drop = train_model(BaseCatsDogsCNN, dropout_rate=0)
print("\nWith Dropout")
with_drop = train_model(BaseCatsDogsCNN, dropout_rate=0.5)
print_comparison("No Dropout", "With Dropout", no_drop, with_drop)

print("\nBuilt-in Dropout")
builtin = train_model(BaseCatsDogsCNN, dropout_rate=0.5, dropout_class=nn.Dropout2d)
print("\nCustom Dropout")
custom = train_model(BaseCatsDogsCNN, dropout_rate=0.5, dropout_class=CustomDropout)
print_comparison("Built-in Dropout", "Custom Dropout", builtin, custom)

No Dropout
Epoch 1: Train Loss 4.3943, Acc 58.90%, Val Loss 0.9618, Acc 62.20%, Weight Mag 3.869785
Epoch 2: Train Loss 0.9721, Acc 62.00%, Val Loss 0.6999, Acc 64.00%, Weight Mag 3.899338
Epoch 3: Train Loss 0.5277, Acc 73.45%, Val Loss 0.6379, Acc 68.70%, Weight Mag 3.922705
Epoch 4: Train Loss 0.4800, Acc 76.30%, Val Loss 0.6910, Acc 64.80%, Weight Mag 3.939906
Epoch 5: Train Loss 0.4727, Acc 76.55%, Val Loss 0.6892, Acc 65.00%, Weight Mag 3.967265

With Dropout
Epoch 1: Train Loss 4.0132, Acc 51.60%, Val Loss 0.6920, Acc 53.30%, Weight Mag 3.994448
Epoch 2: Train Loss 0.6997, Acc 54.55%, Val Loss 0.6753, Acc 57.10%, Weight Mag 4.066553
Epoch 3: Train Loss 0.7105, Acc 54.55%, Val Loss 0.6626, Acc 60.90%, Weight Mag 4.137199
Epoch 4: Train Loss 0.6774, Acc 57.35%, Val Loss 0.6620, Acc 60.40%, Weight Mag 4.165039
Epoch 5: Train Loss 0.6735, Acc 58.90%, Val Loss 0.6512, Acc 61.00%, Weight Mag 4.203224

Results Comparison:
No Dropout:
Final Train Loss: 0.4727, Acc: 76.55%
Final Val Loss

In [5]:
print("No Early Stopping")
no_es = train_model(BaseCatsDogsCNN, dropout_rate=0.5)
print("\nWith Early Stopping")
es = train_model(BaseCatsDogsCNN, dropout_rate=0.5, patience=2, max_epochs=100)
print_comparison("No Early Stopping", "With Early Stopping", no_es, es)

No Early Stopping
Epoch 1: Train Loss 4.2190, Acc 51.65%, Val Loss 0.6971, Acc 51.80%, Weight Mag 3.998729
Epoch 2: Train Loss 0.7384, Acc 51.85%, Val Loss 0.6893, Acc 53.00%, Weight Mag 4.089215
Epoch 3: Train Loss 0.7046, Acc 51.10%, Val Loss 0.6862, Acc 58.20%, Weight Mag 4.160253
Epoch 4: Train Loss 0.6960, Acc 53.00%, Val Loss 0.6801, Acc 51.70%, Weight Mag 4.204425
Epoch 5: Train Loss 0.7005, Acc 53.65%, Val Loss 0.6860, Acc 53.20%, Weight Mag 4.238151

With Early Stopping
Epoch 1: Train Loss 4.6461, Acc 54.70%, Val Loss 0.6942, Acc 46.30%, Weight Mag 4.014052
Epoch 2: Train Loss 0.7065, Acc 51.40%, Val Loss 0.6823, Acc 58.90%, Weight Mag 4.067505
Epoch 3: Train Loss 0.6955, Acc 57.25%, Val Loss 0.6805, Acc 60.00%, Weight Mag 4.108136
Epoch 4: Train Loss 0.6934, Acc 54.55%, Val Loss 0.6857, Acc 60.30%, Weight Mag 4.145878
Epoch 5: Train Loss 0.6956, Acc 56.00%, Val Loss 0.6852, Acc 58.30%, Weight Mag 4.187772
Early stopping at epoch 5

Results Comparison:
No Early Stopping:
Final