# Package import and Data Loading

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

import torch
import random
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from utils import *

In [None]:
# 2. Data loading
transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

x, y = next(iter(train_loader))
print(x.shape, y.shape)

npimg = torchvision.utils.make_grid(x).numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.axis('off')
plt.show()

# Model Definition, Running and Results:

In [None]:
# Hyperparameters
learning_rate = 0.01
epochs = 5

# Define model, loss and optimizer

class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.net(x)



class NN_BN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.net(x)

criterion = nn.CrossEntropyLoss()

model_no_bn = NN()
optimizer_1 = optim.Adam(model_no_bn.parameters(), lr=learning_rate)

model_bn = NN_BN()
optimizer_2 = optim.Adam(model_bn.parameters(), lr=learning_rate)


In [None]:
batch_sizes = [8, 32, 128, 256]
loss_across_batch_no_bn = list()
accuracy_across_batch_no_bn = list()

for batch_size in batch_sizes:
  print(f"######## TRAINING FOR BATCH SIZE: {batch_size} ###################")

  model_no_bn = NN()
  optimizer_1 = optim.Adam(model_no_bn.parameters(), lr=learning_rate)

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  # Train model without BatchNorm with Adam
  loss_no_bn, acc_no_bn = train_model(model_no_bn, optimizer_1, criterion,
                                      train_loader, test_loader, epochs)
  loss_across_batch_no_bn.append(np.mean(loss_no_bn))
  accuracy_across_batch_no_bn.append(np.mean(acc_no_bn))

In [None]:
loss_across_batch_bn = list()
accuracy_across_batch_bn = list()

for batch_size in batch_sizes:
  print(f"######## TRAINING FOR BATCH SIZE: {batch_size} ###################")
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  model_bn = NN_BN()
  optimizer_2 = optim.Adam(model_bn.parameters(), lr=learning_rate)

  # Train model with BatchNorm with Adam
  loss_bn, acc_bn = train_model(model_bn, optimizer_2, criterion, train_loader,
                                test_loader, epochs)

  loss_across_batch_bn.append(np.mean(loss_bn))
  accuracy_across_batch_bn.append(np.mean(acc_bn))


In [None]:
plot_compare_training_batch_curves(
    loss_bn=loss_across_batch_bn,
    loss_no_bn=loss_across_batch_no_bn,
    acc_bn=accuracy_across_batch_bn,
    acc_no_bn=accuracy_across_batch_no_bn,
    batch_number=len(batch_sizes)
)


In [None]:
model_no_bn = NN()
optimizer_1 = optim.Adam(model_no_bn.parameters(), lr=learning_rate)

model_bn = NN_BN()
optimizer_2 = optim.Adam(model_bn.parameters(), lr=learning_rate)

In [None]:
# Train model with BatchNorm with Adam and without eval_activated

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

loss_bn_no_eval_2, acc_bn_no_eval_2 = train_model(model_bn, optimizer_2, criterion,
                                              train_loader, test_loader, epochs = 15,
                                              eval_status = False)

In [None]:
model_bn = NN_BN()
optimizer_2 = optim.Adam(model_bn.parameters(), lr=learning_rate)

In [None]:
# Train model with BatchNorm with Adam and with eval_activated

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

loss_bn_eval_2, acc_bn_eval_2 = train_model(model_bn, optimizer_2, criterion,
                                        train_loader, test_loader, epochs = 15)

In [None]:
plot_compare_training_eval_curves(
    acc_eval=acc_bn_eval_2,
    acc_no_eval=acc_bn_no_eval_2,
    epochs=15
)

In [None]:
model_no_bn = NN()
optimizer_1 = optim.Adam(model_no_bn.parameters(), lr=learning_rate)

model_bn = NN_BN()
optimizer_2 = optim.Adam(model_bn.parameters(), lr=learning_rate)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

_ , _ = train_model(model_no_bn, optimizer_1, criterion,
                   train_loader, test_loader, epochs = 10)

_ , _ = train_model(model_bn, optimizer_2, criterion,
                   train_loader, test_loader, epochs = 10)

In [None]:
# Obtention of dead neuron statistics.

acts_no_bn = get_relu_activations(model_no_bn, train_loader)
acts_bn = get_relu_activations(model_bn, train_loader)

dead_no_bn, avg_zero_no_bn = count_dead_neurons(acts_no_bn)
dead_bn, avg_zero_bn = count_dead_neurons(acts_bn)

In [None]:
dead_neuron_plot(dead_no_bn, avg_zero_no_bn, dead_bn, avg_zero_bn)

In [None]:
# Visualize linear weights for Non normalized model
visualize_linear1_weights_2d(model_no_bn, input_shape=(1, 28, 28))

In [None]:
# Visualize linear weights for Batch normalized model
visualize_linear1_weights_2d(model_bn, input_shape=(1, 28, 28))