In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
import numpy as np

def register_hooks(model, activation_dict):
    def get_activation(name):
        def hook(model, input, output):
            activation_dict[name] = output.detach()
        return hook

    hooks = []
    for name, layer in model.named_modules():
        if isinstance(layer, (nn.Conv2d, nn.BatchNorm2d, nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d, nn.Linear)):
            hooks.append(layer.register_forward_hook(get_activation(name)))
    return hooks

def train_model(model, trainloader, criterion, optimizer, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:  # Print every 100 mini-batches
                print(f'[Epoch: {epoch + 1}, Mini-batch: {i + 1}] loss: {running_loss / 100:.3f}')
                running_loss = 0.0
    print('Finished Training')

def compare_models(model1, model2, input_images):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model1, model2 = model1.to(device), model2.to(device)
    input_images = input_images.to(device)

    activation1, activation2 = {}, {}
    hooks1 = register_hooks(model1, activation1)
    hooks2 = register_hooks(model2, activation2)

    with torch.no_grad():
        _ = model1(input_images)
        _ = model2(input_images)

    for hook in hooks1:
        hook.remove()
    for hook in hooks2:
        hook.remove()

    norms = []
    for layer_name in activation1.keys():
        print(f'{layer_name} - output size: {activation1[layer_name].size()}')
        diff = (activation1[layer_name] - activation2[layer_name]).cpu().numpy()

        if len(diff.shape) == 4:  # For 4D tensors
            batch_size, num_channels, height, width = diff.shape
            for b in range(batch_size):
                for c in range(num_channels):
                    print(f'Batch {b}, Channel {c}, Values:\n', diff[b, c, :, :])
                    norm = np.sqrt(np.sum(diff[b, c, :, :] ** 2))
                    norms.append((layer_name, b, c, norm))
                    print(f'Euclidean norm for Batch {b}, Channel {c}: {norm}')
        elif len(diff.shape) == 2:  # For 2D tensors
            print('Values:\n', diff)
            norm = np.sqrt(np.sum(diff ** 2))
            norms.append((layer_name, 0, 0, norm))
            print(f'Euclidean norm: {norm}')

    # Print all norms at the end
    print("\nSummary of Euclidean norms:")
    for layer_name, b, c, norm in norms:
        if b == 0 and c == 0:
            print(f'Layer: {layer_name}, Euclidean norm: {norm}')
        else:
            print(f'Layer: {layer_name}, Batch {b}, Channel {c}, Euclidean norm: {norm}')

# Define transformations for the training and test data
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load the training and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Load the ResNet18 models
model1 = resnet18(pretrained=False, num_classes=10)
model2 = resnet18(pretrained=False, num_classes=10)

# Define loss function and optimizers with different learning rates
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(model1.parameters(), lr=0.001, momentum=0.9)
optimizer2 = optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)

# Train both models
print("Training model1 with lr=0.001")
train_model(model1, trainloader, criterion, optimizer1, epochs=1)

print("Training model2 with lr=0.01")
train_model(model2, trainloader, criterion, optimizer2, epochs=1)

# Get a random image from the test set
dataiter = iter(testloader)
images, labels = dataiter.next()
images = images[:2]  # Use a batch size of 2 to avoid the error

# Compare the models
compare_models(model1, model2, images)


Files already downloaded and verified
Files already downloaded and verified
Training model1 with lr=0.001
[Epoch: 1, Mini-batch: 100] loss: 2.235
[Epoch: 1, Mini-batch: 200] loss: 1.975
[Epoch: 1, Mini-batch: 300] loss: 1.885
[Epoch: 1, Mini-batch: 400] loss: 1.821
[Epoch: 1, Mini-batch: 500] loss: 1.786
[Epoch: 1, Mini-batch: 600] loss: 1.720
[Epoch: 1, Mini-batch: 700] loss: 1.682
[Epoch: 1, Mini-batch: 800] loss: 1.658
[Epoch: 1, Mini-batch: 900] loss: 1.640
[Epoch: 1, Mini-batch: 1000] loss: 1.627
[Epoch: 1, Mini-batch: 1100] loss: 1.575
[Epoch: 1, Mini-batch: 1200] loss: 1.554
[Epoch: 1, Mini-batch: 1300] loss: 1.582
[Epoch: 1, Mini-batch: 1400] loss: 1.559
[Epoch: 1, Mini-batch: 1500] loss: 1.525
Finished Training
Training model2 with lr=0.01
[Epoch: 1, Mini-batch: 100] loss: 2.485
[Epoch: 1, Mini-batch: 200] loss: 2.206
[Epoch: 1, Mini-batch: 300] loss: 2.102
[Epoch: 1, Mini-batch: 400] loss: 1.888
[Epoch: 1, Mini-batch: 500] loss: 1.966
[Epoch: 1, Mini-batch: 600] loss: 1.901
[