In [41]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt


In [42]:
class LeNet_BatchNorm(nn.Module):
    def __init__(self):
        
        super().__init__()
        self.convlayers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(6),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(16),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(120),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(nn.Linear(120, 84), nn.Tanh(), nn.BatchNorm1d(84), nn.Linear(84, 10), nn.Softmax(1))

    def forward(self, x):
        x = self.convlayers(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


In [43]:
class LeNet_InputBatchNorm(nn.Module):
    def __init__(self):
        
        super().__init__()
        self.convlayers = nn.Sequential(
            nn.BatchNorm2d(1),
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(6),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(16),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(120),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(nn.Linear(120, 84), nn.Tanh(), nn.BatchNorm1d(84), nn.Linear(84, 10), nn.Softmax(1))

    def forward(self, x):
        x = self.convlayers(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


In [44]:
class LeNet_Dropout(nn.Module):
    def __init__(self):
        
        super().__init__()
        self.convlayers = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.Dropout(p=0.5),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.Dropout(p=0.5),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.Dropout(p=0.5),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(nn.Linear(120, 84), nn.Tanh(), nn.Dropout(p=0.5), nn.Linear(84, 10), nn.Softmax(1))

    def forward(self, x):
        x = self.convlayers(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


In [45]:
class LeNet_DropoutBatchNorm(nn.Module):
    def __init__(self):
        
        super().__init__()
        self.convlayers = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(6),
            nn.Dropout(p=0.5),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(16),
            nn.Dropout(p=0.5),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.BatchNorm2d(120),
            nn.Dropout(p=0.5),
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(nn.Linear(120, 84), nn.Tanh(), nn.BatchNorm2d(84),nn.Dropout(p=0.5), nn.Linear(84, 10), nn.Softmax(1))

    def forward(self, x):
        x = self.convlayers(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


In [46]:
def train(model, dataloader, optimizer, device="cpu"):

    # store loss & accuracy for each epoch
    total_loss = 0.0
    total = 0.0
    correct = 0.0

    model.train()
    for images, labels in dataloader:

        # send stuff to device
        images, labels = images.to(device), labels.to(device)

        # forward pass
        preds = model(images)

        # compute loss
        loss = loss_fn(preds, labels)

        # gradient descent
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # loss and accuracy calculations
        total_loss += preds.shape[0] * loss.item()
        _, predicted = torch.max(preds.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = total_loss / total
    train_accuracy = correct / total

    print(f"[Training] Loss: {train_loss}, Accuracy: {train_accuracy}")

    return train_loss, train_accuracy


In [47]:
def validate(model, dataloader, device="cpu"):

    # store loss & accuracy for each epoch
    total_loss = 0.0
    total = 0.0
    correct = 0.0

    model.eval()
    with torch.no_grad():
        for images, labels in dataloader:

            # send stuff to device
            images, labels = images.to(device), labels.to(device)

            # forward pass
            preds = model(images)

            # compute loss
            loss = loss_fn(preds, labels)

            # loss and accuracy calculations
            total_loss += preds.shape[0] * loss.item()
            _, predicted = torch.max(preds.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = total_loss / total
    val_accuracy = correct / total

    print(f"[Validation] Loss: {val_loss}, Accuracy: {val_accuracy}")

    return val_loss, val_accuracy


In [48]:
batch_size = 32
device = "cuda" if torch.cuda.is_available() else "cpu"
epochs = 12
loss_fn = nn.CrossEntropyLoss()

In [49]:
# define appropriate transforms for each part
p2_transformations = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))])
p3_transformations = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
p4_transformations = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))])
p5_transformations = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))])


#  Part 2

In [50]:
train_dataset = datasets.MNIST(root="MNIST", train=True, transform=p2_transformations, download=True)
val_dataset = datasets.MNIST(root="MNIST", train=False, transform=p2_transformations, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [54]:
p2_train_loss = []
p2_validation_loss = []
p2_train_accuracy = []
p2_validation_accuracy = []

model = LeNet_BatchNorm().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001)


for epoch in range(epochs):

    # Validation Phase
    print(f"[Validation] Epoch: {epoch}")
    val_loss, val_acc = validate(model, val_loader, device)

    # Training Phase
    print(f"[Training] Epoch: {epoch}")
    train_loss, train_acc = train(model, train_loader, optimizer, device)

    p2_train_loss.append(train_loss)
    p2_validation_loss.append(val_loss)
    p2_train_accuracy.append(train_acc)
    p2_validation_accuracy.append(val_acc)


[Validation] Epoch: 0
[Validation] Loss: 2.302244538497925, Accuracy: 0.1176
[Training] Epoch: 0
[Training] Loss: 1.5185882519404093, Accuracy: 0.9534
[Validation] Epoch: 1
[Validation] Loss: 1.4845309381484986, Accuracy: 0.9781
[Training] Epoch: 1
[Training] Loss: 1.486625106048584, Accuracy: 0.9760333333333333
[Validation] Epoch: 2
[Validation] Loss: 1.4805363555908204, Accuracy: 0.9811
[Training] Epoch: 2
[Training] Loss: 1.481323575592041, Accuracy: 0.9806333333333334
[Validation] Epoch: 3
[Validation] Loss: 1.4793711917877197, Accuracy: 0.9821
[Training] Epoch: 3


KeyboardInterrupt: 

In [None]:
print(model)

In [None]:
bn_weights = [
    model.convlayers[2].weight.cpu().detach().numpy(),
    model.convlayers[6].weight.cpu().detach().numpy(),
    model.convlayers[10].weight.cpu().detach().numpy(),
    model.fc[2].weight.cpu().detach().numpy(),
]

bn_bias = [
    model.convlayers[2].bias.cpu().detach().numpy(),
    model.convlayers[6].bias.cpu().detach().numpy(),
    model.convlayers[10].bias.cpu().detach().numpy(),
    model.fc[2].bias.cpu().detach().numpy(),
]

In [None]:
# violin plot for weights & biases
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
gamma = ax.violinplot(bn_weights)

plt.title("Violin Plot for Gamma")
plt.xlabel("Batchnorm Layer Number")
plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
beta = ax.violinplot(bn_bias)
plt.title("Violin Plot for Beta")
plt.xlabel("Batchnorm Layer Number")
plt.show()

# Part 3

In [None]:
train_dataset = datasets.MNIST(root="MNIST", train=True, transform=p3_transformations, download=True)
val_dataset = datasets.MNIST(root="MNIST", train=False, transform=p3_transformations, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [None]:
p3_train_loss = []
p3_validation_loss = []
p3_train_accuracy = []
p3_validation_accuracy = []

model = LeNet_InputBatchNorm().to(device)
optimizer = optim.SGD(model.parameters())


for epoch in range(epochs):

    # Training Phase
    print(f"[Training] Epoch: {epoch}")
    train_loss, train_acc = train(model, train_loader, optimizer, device)

    # Validation Phase
    print(f"[Validation] Epoch: {epoch}")
    val_loss, val_acc = validate(model, val_loader, device)

    p3_train_loss.append(train_loss)
    p3_validation_loss.append(val_loss)
    p3_train_accuracy.append(train_acc)
    p3_validation_accuracy.append(val_acc)


In [None]:
print(model)

In [None]:
bn_weights = [
    model.convlayers[0].weight.cpu().detach().numpy(),
    model.convlayers[3].weight.cpu().detach().numpy(),
    model.convlayers[7].weight.cpu().detach().numpy(),
    model.convlayers[11].weight.cpu().detach().numpy(),
    model.fc[2].weight.cpu().detach().numpy(),
]

bn_bias = [
    model.convlayers[0].weight.cpu().detach().numpy(),
    model.convlayers[3].bias.cpu().detach().numpy(),
    model.convlayers[7].bias.cpu().detach().numpy(),
    model.convlayers[11].bias.cpu().detach().numpy(),
    model.fc[2].bias.cpu().detach().numpy(),
]

In [None]:
# violin plot for weights & biases
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
gamma = ax.violinplot(bn_weights)

plt.title("Violin Plot for Gamma")
plt.xlabel("Batchnorm Layer Number")
plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
beta = ax.violinplot(bn_bias)
plt.title("Violin Plot for Beta")
plt.xlabel("Batchnorm Layer Number")
plt.show()

In [None]:
plt.title("Input Standard Norm vs Input Batch Norm")
plt.ylabel("Training Accuracy")
plt.xlabel("Epochs")

plt.plot(range(1, epochs+1), p2_train_accuracy, label="Input Standard Norm")
plt.plot(range(1, epochs+1), p3_train_accuracy, label="Input Batch Norm")
plt.legend()

In [None]:
plt.title("Input Batch Norm vs Input Standard Norm")
plt.ylabel("Validation Accuracy")
plt.xlabel("Epochs")

plt.plot(range(1, epochs+1), p2_validation_accuracy, label="Input Standard Norm")
plt.plot(range(1, epochs+1), p3_validation_accuracy, label="Input Batch Norm")

plt.legend()

# Part 4

In [None]:
train_dataset = datasets.MNIST(root="MNIST", train=True, transform=p4_transformations, download=True)
val_dataset = datasets.MNIST(root="MNIST", train=False, transform=p4_transformations, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [None]:
p4_train_loss = []
p4_validation_loss = []
p4_train_accuracy = []
p4_validation_accuracy = []

model = LeNet_Dropout().to(device)
optimizer = optim.SGD(model.parameters())


for epoch in range(epochs):

    # Training Phase
    print(f"[Training] Epoch: {epoch}")
    train_loss, train_acc = train(model, train_loader, optimizer, device)

    # Validation Phase
    print(f"[Validation] Epoch: {epoch}")
    val_loss, val_acc = validate(model, val_loader, device)

    p4_train_loss.append(train_loss)
    p4_validation_loss.append(val_loss)
    p4_train_accuracy.append(train_acc)
    p4_validation_accuracy.append(val_acc)


In [None]:
print(model)

In [None]:
plt.title("Input Batch Norm vs Input Standard Norm")
plt.ylabel("Validation Accuracy")
plt.xlabel("Epochs")

plt.plot(range(1, epochs+1), p2_validation_accuracy, label="Input Standard Norm")
plt.plot(range(1, epochs+1), p3_validation_accuracy, label="Input Batch Norm")
plt.plot(range(1, epochs+1), p4_validation_accuracy, label="Dropout")

plt.legend()

# Part 5

In [None]:
train_dataset = datasets.MNIST(root="MNIST", train=True, transform=p5_transformations, download=True)
val_dataset = datasets.MNIST(root="MNIST", train=False, transform=p5_transformations, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [None]:
p5_train_loss = []
p5_validation_loss = []
p5_train_accuracy = []
p5_validation_accuracy = []

model = LeNet_DropoutBatchNorm().to(device)
optimizer = optim.SGD(model.parameters())


for epoch in range(epochs):

    # Training Phase
    print(f"[Training] Epoch: {epoch}")
    train_loss, train_acc = train(model, train_loader, optimizer, device)

    # Validation Phase
    print(f"[Validation] Epoch: {epoch}")
    val_loss, val_acc = validate(model, val_loader, device)

    p5_train_loss.append(train_loss)
    p5_validation_loss.append(val_loss)
    p5_train_accuracy.append(train_acc)
    p5_validation_accuracy.append(val_acc)


In [None]:
print(model)

In [None]:
plt.title("Input Batch Norm vs Input Standard Norm")
plt.ylabel("Validation Accuracy")
plt.xlabel("Epochs")

plt.plot(range(1,  epochs+1), p2_validation_accuracy, label="Batch Norm")
plt.plot(range(1,  epochs+1), p4_validation_accuracy, label="Dropout")
plt.plot(range(1,  epochs+1), p5_validation_accuracy, label="Dropout + Batch Norm")

plt.legend()