In [1]:
import numpy as np
import torchvision.datasets

from sklearn import linear_model
from torch import nn
import torchvision

from torchvision.transforms import Compose
import torch

from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader



In [2]:
def XavierInitialisation(network):
    """ Perform Xavier Initialisation on a Neural Network """
    for p in network.modules():
        if p.__class__.__name__ == "Linear" or p.__class__.__name__ == "Conv2d":
            with torch.no_grad():
                nn.init.xavier_uniform_(p.weight)
                p.bias.zero_()

def XavierInitialisationNormal(network):
    """ Perform Xavier Initialisation on a Neural Network """
    for p in network.modules():
        if p.__class__.__name__ == "Linear" or p.__class__.__name__ == "Conv2d":
            with torch.no_grad():
                nn.init.xavier_normal_(p.weight)
                p.bias.zero_()

# All the coefficients for maintaining a constant variance propagation
normalising_coeff_dict = {
    #nn.Sigmoid: 1.92,
    nn.Tanh: 1.58,
    nn.ReLU: 1.41,
    #nn.Softsign: 2.317374,
    #nn.ELU: 1.242043
}

normalising_coeff_arr = normalising_coeff_dict.values()

activation_functions = [
    nn.ReLU,
    nn.Tanh,
    nn.Sigmoid,
    nn.SELU,
    nn.GELU
]

def ConstVarInitialisation(network, normalising_coefficient=None, normal=False):
    """ Perform Constant Variance Initialisation on a Neural Network """
    net_modules = list(network.modules())
    for i, p in enumerate(network.modules()):
        if p.__class__.__name__ == "Linear" and net_modules[
            i - 1].__class__ in activation_functions or p.__class__.__name__ == "Conv2d" and normalising_coefficient is not None:
            with torch.no_grad():
                receptive_field_size = 1
                if p.weight.dim() > 2:
                    for s in p.weight.shape[2:]:
                        receptive_field_size *= s
                if normalising_coefficient is None:
                    # Get what the coefficient should be by looking at the applied activation before this layer
                    normalising_coefficient = normalising_coeff_dict[net_modules[i - 1].__class__]
                if normal:
                  p.weight.normal_(0, normalising_coefficient / np.sqrt(p.weight.shape[1]*receptive_field_size))
                else:
                  p.weight.uniform_(
                      -normalising_coefficient / np.sqrt(p.weight.shape[1]*receptive_field_size), 
                      normalising_coefficient / np.sqrt(p.weight.shape[1]*receptive_field_size)
                      )
                # Fill bias with zeros
                p.bias.zero_()


def test_backpropagation(neural_network, data_loader, loss_fn):
    """ Get the backpropagation distributions from running a number of inputs to the network """
    handles = []
    layerID = 0
    backpropagation_values = {}
    # Set hooks
    def set_backward_hooks(name):
        def hook(model, input, output):
            backpropagation_values[name].append(output[0].detach())
        return hook

    # Initialise hook storage holders
    for m in neural_network.modules():
        if m.__class__.__name__ == "Linear" or m.__class__.__name__ == "Conv2d":
            handles.append(m.register_full_backward_hook(set_backward_hooks(layerID)))
            backpropagation_values[layerID] = []
            layerID+=1

    # Run a few inputs through the network and measure backpropagation with hooks
    for batch, (X, y) in enumerate(data_loader):
        pred = neural_network(X)
        loss = loss_fn(pred, y)

        loss.backward()
        if batch > 20:
            break

    # Remove hooks
    for h in handles:
        h.remove()

    return backpropagation_values

def test_backpropagation_scaled(neural_network, data_loader, loss_fn):
    """ Get the backpropagation distributions from running a number of inputs to the network """
    handles = []
    layerID = 0
    backpropagation_values = {}
    # Set hooks
    def set_backward_hooks(name):
        def hook(model, input, output):
            backpropagation_values[name].append(output[0].detach())
        return hook

    # Initialise hook storage holders
    for m in neural_network.modules():
        if m.__class__.__name__ == "Linear":
            handles.append(m.register_full_backward_hook(set_backward_hooks(layerID)))
            backpropagation_values[layerID] = []
            layerID+=1

    # Run a few inputs through the network and measure backpropagation with hooks
    for batch, (X, y) in enumerate(data_loader):
        # Compute how to scale the backpropagation to ensure it is constant variance the backpropagation which ensures
        pred = neural_network(X)
        loss = loss_fn(pred, y)

        loss.backward()

        # Get the variance of the gradient of the 2nd last module (layer before output layer), then mimic that over all layers
        print(neural_network.modules())
        propagated_var = np.var(neural_network.modules()[-2].grad)
        backpropagation_values.append(neural_network.modules()[0])
        for i, m in enumerate(neural_network.modules()[1:]):
            m.grad *= propagated_var/m.grad #/neural_network.modules()[i-1]
            backpropagation_values.append(m.grad)

        if batch > 20:
            break

    # Remove hooks
    for h in handles:
        h.remove()

    return backpropagation_values

In [3]:
def test_forward_propagation(neural_network, data, loss_fn, device, transform=torchvision.transforms.Compose([]), name="Tanh"):
    # Handles for the layer hooks
    handles = []

    # Register a forward hook to get the value of each layer
    layer_intermediate_values = {}
    def set_forward_hooks(name):
        def hook(model, input, output):
            layer_intermediate_values[name] = output.detach()
        return hook

    layerID = 0
    for m in neural_network.modules():
        if m.__class__.__name__ == "Linear" or m.__class__.__name__ == "Conv2d" or m.__class__.__name__ ==  name:
            handles.append(m.register_forward_hook(set_forward_hooks(layerID)))
            layerID += 1

    # Need to run network to get intermediate values
    _out = neural_network(transform(torch.stack([data[d][0] for d in range(64)]).to(device)))

    # Remove all handles
    for h in handles:
        h.remove()

    del handles
    del _out

    # Return the intermediate values in the layers
    return layer_intermediate_values

In [4]:
# The CIFAR10 dataset
cifar10_dataset_augmented = datasets.CIFAR10(
    root="./datasets",
    train=True,
    download=True,
    transform=Compose([
        ToTensor(),
        lambda x: torchvision.transforms.Normalize(torch.mean(x), torch.std(x), inplace=False)(x)
    ])
)

# The dataset applied through the training transform
train_transform = transform=Compose([
    torchvision.transforms.RandomHorizontalFlip(),
])

# The dataset applied through the validation transform
validation_transform = transform=Compose([])

# Split the dataset into train and validation
cifar10_dataset_train, cifar10_dataset_validation = torch.utils.data.random_split(cifar10_dataset_augmented, [45000, 5000])

# Create dataloaders
cifar10_dataset_loader_train = DataLoader(cifar10_dataset_train, batch_size=64, shuffle=True)
cifar10_dataset_loader_validation = DataLoader(cifar10_dataset_validation, batch_size=64, shuffle=False)

Files already downloaded and verified


In [5]:
# The CIFAR10 dataset, where a half std is applied to the input (instead of standard normalisation)

# The dataset applied through the training transform
train_transform_half_std = transform=Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    lambda x: torchvision.transforms.Normalize(torch.mean(x), torch.std(x), inplace=True)(x) * 0.5
])

# The dataset applied through the validation transform
validation_transform_half_std = transform=Compose([
    lambda x: torchvision.transforms.Normalize(torch.mean(x), torch.std(x), inplace=True)(x) * 0.5
])

In [6]:

# 20 Layer Network almost identical to VGG-19, except padding was added to allow the input to reach the output
class NNCIFAR10(nn.Module):
    """ 
    Neural network for use with the CIFAR10 dataset.
    A 19 Layer Network almost identical to VGG-19, except padding was added to allow the input to reach the output.
    """
    def __init__(self, activation_function):
        super(NNCIFAR10, self).__init__()
        self.module_stack = nn.Sequential(
            # Input = layer 0
            nn.Conv2d(3, 64, (3, 3), padding=2), # 1
            activation_function(),
            nn.Conv2d(64, 64, (3, 3), padding=2), # 2
            activation_function(),
            nn.MaxPool2d((2, 2)),

            nn.Conv2d(64, 128, (3, 3), padding=2), # 3
            activation_function(),
            nn.Conv2d(128, 128, (3, 3), padding=2), # 4
            activation_function(),
            nn.MaxPool2d((2, 2)),

            nn.Conv2d(128, 256, (3, 3), padding=2), # 5
            activation_function(),
            nn.Conv2d(256, 256, (3, 3), padding=2), # 6
            activation_function(),
            nn.Conv2d(256, 256, (3, 3), padding=2), # 7
            activation_function(),
            nn.Conv2d(256, 256, (3, 3), padding=2), # 8
            activation_function(),
            nn.MaxPool2d((2, 2)),

            nn.Conv2d(256, 512, (3, 3), padding=2), # 9
            activation_function(),
            nn.Conv2d(512, 512, (3, 3), padding=2), # 10
            activation_function(),
            nn.Conv2d(512, 512, (3, 3), padding=2), # 11
            activation_function(),
            nn.Conv2d(512, 512, (3, 3), padding=2), # 12
            activation_function(),
            nn.MaxPool2d((2, 2)),

            nn.Conv2d(512, 512, (3, 3), padding=0), # 13
            activation_function(),
            nn.Conv2d(512, 512, (3, 3), padding=0), # 14
            activation_function(),
            nn.MaxPool2d((2, 2)),

            nn.Flatten(),

            nn.Linear(2048, 2048), # 15
            activation_function(),
            nn.Dropout(0.25),
            nn.Linear(2048, 2048), # 16
            activation_function(),
            nn.Dropout(0.4),
            nn.Linear(2048, 1024), # 17
            activation_function(),
            nn.Dropout(0.25),
            nn.Linear(1024, 10), # 18
        )

    def forward(self, x):
        return self.module_stack(x)

In [7]:
def training_loop(network, 
                  optimiser, 
                  dataset_loader_train, 
                  dataset_loader_validation, 
                  loss_arr, acc_validation, 
                  epochs=40,
                  train_transform=train_transform, 
                  validation_transform=validation_transform):
  """ Train a network on the CIFAR10 dataset """
  lr = linear_model.LinearRegression()
  num_epochs = 0
  batch_count = 0

  for i in range(epochs):
      num_epochs += 1
      print()
      print("=================================")
      print("Epoch: " + str(num_epochs))
      print("=================================")
      for batch, (X, y) in enumerate(dataset_loader_train):
          loss =  loss_fn(network(train_transform(X).to(device)), y.to(device)) #, inplace=True
          optimiser.zero_grad()
          loss.backward()
          optimiser.step()

          # Print
          if batch % print_every == 0:
              cur_acc_val = get_accuracy(network, dataset_loader_validation, device, validation_transform)
              acc_validation.append([cur_acc_val])
              if batch_count > 25:
                acc_grad = lr.fit(np.arange(25).reshape(-1, 1), acc_validation[-25:]).coef_[0]
                print("\rloss: " + str(loss.item()) + " | acc: " + str(cur_acc_val) + " | acc_grad: " + str(acc_grad) + "                        ", end="")
                if acc_grad < 0.00001:
                  # Validation accuracy has stopped improving, decrease learning rate
                  for g in optimiser.param_groups:
                    g['lr'] = g['lr'] * 0.1
                    print()
                    print("Decreased Learning Rate by a factor of 10")
                    batch_count = 10
              else:
                batch_count += 1
                print("\rloss: " + str(loss.item()) + " acc: " + str(cur_acc_val) + "                        ", end="")
          if batch % store_loss_every == 0:
              loss_arr.append(loss.item())

def run_test(initialisation, 
             activation = nn.Tanh, 
             dataset_loader_train = cifar10_dataset_loader_train, 
             dataset_loader_validation = cifar10_dataset_loader_validation,
             train_transform=train_transform,
             validation_transform=validation_transform):
  """ Run an initialisation test on the CIFAR dataset """
  network = NNCIFAR10(activation).to(device)
  initialisation(network)
  optimiser = torch.optim.SGD(network.parameters(), lr=0.001, momentum=0.9,
                            weight_decay=1e-5)
  loss_fn = nn.CrossEntropyLoss()

  loss_arr = []
  acc_validation = []

  training_loop(
      network, 
      optimiser, 
      dataset_loader_train, 
      dataset_loader_validation,
      loss_arr, 
      acc_validation,
      40)
  
  return loss_arr, acc_validation, network, optimiser

def get_accuracy(model, dataloader, device, transform):
  """ Determine the top-1 accuracy of the model using the data in the `dataloader` """
  model.eval()
  with torch.no_grad():
    acc_total = 0
    acc_len = 0
    for X,y in dataloader:
      acc_total += (torch.argmax(model(transform(X).to(device)), dim=1) == y.to(device)).int().sum()
      acc_len += len(y)
    final_acc = acc_total/acc_len
  model.train()
  return final_acc.item()

def save_arr(name, arr):
  with open("/content/drive/MyDrive/Datasets/CIFARDataset/" + name, "w+") as f:
    f.write(",".join([str(i) for i in arr]))
    
def save_net(name, network):
  torch.save(network, "/content/drive/MyDrive/Datasets/CIFARDataset/" + name)

def save_arr_local(name, arr, itera=0):
  with open("./HeCIFAR_test/" + str(itera) + "/" + name, "w+") as f:
    f.write(",".join([str(i) for i in arr]))
    
def save_net_local(name, network, itera=0):
  torch.save(network, "./HeCIFAR_test/" + str(itera) + "/" + name)

def load_arr_local(name, arr):
  with open("./" + name, "w+") as f:
    return [float(i) for i in f.read().split(",")]
    
def load_net_local(name, network):
  return torch.load(network, "./" + name)


In [8]:
lr = linear_model.LinearRegression()
loss_fn = nn.CrossEntropyLoss()
epochs = 5
num_epochs = 0
batch_count = 0
print_every = 100
store_loss_every = 10

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [9]:
for i in range(0, 5):
    relu_loss_arr, relu_acc_validation, relu_model, relu_optimiser = run_test(lambda x: ConstVarInitialisation(x, 1.4142, normal=True), activation=torch.nn.ReLU)
    relu_model.to("cpu")

    save_arr_local("relu_he_loss_arr", relu_loss_arr, i)
    save_arr_local("relu_he_acc_validation", relu_acc_validation, i)

    save_net_local("relu_he_model", relu_model, i)
    save_net_local("relu_he_optimiser", relu_optimiser, i)

    del relu_loss_arr
    del relu_acc_validation
    del relu_model
    del relu_optimiser

print()


Epoch: 1
loss: 1.5888805389404297 acc: 0.36159998178482056                        
Epoch: 2
loss: 1.4336286783218384 acc: 0.4777999818325043                         
Epoch: 3
loss: 1.3920990228652954 acc: 0.515999972820282                          
Epoch: 4
loss: 1.257878303527832 | acc: 0.5719999670982361 | acc_grad: [0.00880231]                         
Epoch: 5
loss: 1.1796770095825195 | acc: 0.6173999905586243 | acc_grad: [0.00633092]                        
Epoch: 6
loss: 1.065219521522522 | acc: 0.6340000033378601 | acc_grad: [0.00549708]                         
Epoch: 7
loss: 0.8841147422790527 | acc: 0.6678000092506409 | acc_grad: [0.00456246]                        
Epoch: 8
loss: 0.7855876088142395 | acc: 0.6681999564170837 | acc_grad: [0.00311831]                        
Epoch: 9
loss: 0.7103956937789917 | acc: 0.7143999934196472 | acc_grad: [0.00289492]                        
Epoch: 10
loss: 0.7164866328239441 | acc: 0.7181999683380127 | acc_grad: [0.00249785]           

In [10]:
for i in range(0, 5):
    elu_relu_normal_loss_arr, elu_relu_normal_acc_validation, elu_relu_normal_model, elu_relu_normal_optimiser = run_test(lambda x: ConstVarInitialisation(x, normalising_coefficient=1.4142, normal=True), activation=torch.nn.ELU)
    elu_relu_normal_model.to("cpu")

    save_arr_local("elu_relu_normal_loss_arr", elu_relu_normal_loss_arr, i)
    save_arr_local("elu_relu_normal_acc_validation", elu_relu_normal_acc_validation, i)

    save_net_local("elu_relu_normal_model", elu_relu_normal_model, i)
    save_net_local("elu_relu_normal_optimiser", elu_relu_normal_optimiser, i)

    del elu_relu_normal_loss_arr
    del elu_relu_normal_acc_validation
    del elu_relu_normal_model
    del elu_relu_normal_optimiser

    print()


Epoch: 1
loss: 1.480309009552002 acc: 0.5027999877929688                          
Epoch: 2
loss: 1.2273972034454346 acc: 0.6229999661445618                        
Epoch: 3
loss: 0.951956570148468 acc: 0.6687999963760376                         
Epoch: 4
loss: 0.805854082107544 | acc: 0.7053999900817871 | acc_grad: [0.00743862]                         
Epoch: 5
loss: 0.974303126335144 | acc: 0.7215999960899353 | acc_grad: [0.00476862]                         
Epoch: 6
loss: 0.5012548565864563 | acc: 0.7337999939918518 | acc_grad: [0.00332185]                        
Epoch: 7
loss: 0.6713380217552185 | acc: 0.7439999580383301 | acc_grad: [0.00191938]                         
Epoch: 8
loss: 0.4169107973575592 | acc: 0.7541999816894531 | acc_grad: [0.00143615]                         
Epoch: 9
loss: 0.5556170344352722 | acc: 0.7673999667167664 | acc_grad: [0.00140692]                         
Epoch: 10
loss: 0.4446567893028259 | acc: 0.7716000080108643 | acc_grad: [0.00130431]          

In [11]:
for i in range(0, 5):
    elu_const_var_loss_arr, elu_const_var_acc_validation, elu_const_var_model, elu_const_var_optimiser = run_test(lambda x: ConstVarInitialisation(x, 1.2453, normal=True), activation=torch.nn.ELU)
    elu_const_var_model.to("cpu")
    save_arr_local("elu_const_var_loss_arr", elu_const_var_loss_arr, i)
    save_arr_local("elu_const_var_acc_validation", elu_const_var_acc_validation, i)

    save_net_local("elu_const_var_model", elu_const_var_model, i)
    save_net_local("elu_const_var_optimiser", elu_const_var_optimiser, i)

    del elu_const_var_loss_arr
    del elu_const_var_acc_validation
    del elu_const_var_model
    del elu_const_var_optimiser

    print()


Epoch: 1
loss: 1.2840454578399658 acc: 0.5051999688148499                         
Epoch: 2
loss: 1.2879178524017334 acc: 0.5839999914169312                         
Epoch: 3
loss: 0.9903225898742676 acc: 0.6577999591827393                        
Epoch: 4
loss: 0.814389705657959 | acc: 0.6881999969482422 | acc_grad: [0.00873723]                         
Epoch: 5
loss: 0.660413920879364 | acc: 0.7116000056266785 | acc_grad: [0.00529277]                         
Epoch: 6
loss: 0.8099545240402222 | acc: 0.718999981880188 | acc_grad: [0.00330277]                         
Epoch: 7
loss: 0.6088966727256775 | acc: 0.7441999912261963 | acc_grad: [0.00265554]                         
Epoch: 8
loss: 0.5110785365104675 | acc: 0.7505999803543091 | acc_grad: [0.00165277]                        
Epoch: 9
loss: 0.46197670698165894 | acc: 0.7599999904632568 | acc_grad: [0.00168031]                        
Epoch: 10
loss: 0.33135178685188293 | acc: 0.7705999612808228 | acc_grad: [0.00106769]         

In [12]:
for i in range(0, 5):
    gelu_relu_loss_arr, gelu_relu_acc_validation, gelu_relu_model, gelu_relu_optimiser = run_test(lambda x: ConstVarInitialisation(x, normalising_coefficient=1.4142, normal=True), activation=torch.nn.GELU)
    gelu_relu_model.to("cpu")

    save_arr_local("gelu_relu_loss_arr", gelu_relu_loss_arr, i)
    save_arr_local("gelu_relu_acc_validation", gelu_relu_acc_validation, i)

    save_net_local("gelu_relu_model", gelu_relu_model, i)
    save_net_local("gelu_relu_optimiser", gelu_relu_optimiser, i)

    del gelu_relu_loss_arr
    del gelu_relu_acc_validation
    del gelu_relu_model
    del gelu_relu_optimiser

    print()


Epoch: 1
loss: 1.7239739894866943 acc: 0.34759998321533203                        
Epoch: 2
loss: 1.5655255317687988 acc: 0.493399977684021                          
Epoch: 3
loss: 1.3200902938842773 acc: 0.5730000138282776                         
Epoch: 4
loss: 1.1162800788879395 | acc: 0.6011999845504761 | acc_grad: [0.00918308]                        
Epoch: 5
loss: 1.0948765277862549 | acc: 0.6363999843597412 | acc_grad: [0.00669031]                        
Epoch: 6
loss: 0.8326551914215088 | acc: 0.670199990272522 | acc_grad: [0.00511861]                         
Epoch: 7
loss: 0.7124075889587402 | acc: 0.7077999711036682 | acc_grad: [0.00460646]                        
Epoch: 8
loss: 0.7743209600448608 | acc: 0.7239999771118164 | acc_grad: [0.004138]                          
Epoch: 9
loss: 0.5661620497703552 | acc: 0.7375999689102173 | acc_grad: [0.00258431]                        
Epoch: 10
loss: 0.6846035122871399 | acc: 0.7507999539375305 | acc_grad: [0.00166677]           

In [13]:
for i in range(0, 5):
    gelu_const_var_loss_arr, gelu_const_var_acc_validation, gelu_const_var_model, gelu_const_var_optimiser = run_test(lambda x: ConstVarInitialisation(x, 1.5331, normal=True), activation=torch.nn.GELU)
    gelu_const_var_model.to("cpu")

    save_arr_local("gelu_const_var_loss_arr", gelu_const_var_loss_arr, i)
    save_arr_local("gelu_const_var_acc_validation", gelu_const_var_acc_validation, i)

    save_net_local("gelu_const_var_model", gelu_const_var_model, i)
    save_net_local("gelu_const_var_optimiser", gelu_const_var_optimiser, i)

    del gelu_const_var_loss_arr
    del gelu_const_var_acc_validation
    del gelu_const_var_model
    del gelu_const_var_optimiser

    print()


Epoch: 1
loss: 1.5837266445159912 acc: 0.43219998478889465                        
Epoch: 2
loss: 1.3241342306137085 acc: 0.517799973487854                          
Epoch: 3
loss: 1.435797095298767 acc: 0.5465999841690063                          
Epoch: 4
loss: 1.0159192085266113 | acc: 0.592799961566925 | acc_grad: [0.00763308]                         
Epoch: 5
loss: 1.0219930410385132 | acc: 0.6484000086784363 | acc_grad: [0.00659169]                        
Epoch: 6
loss: 1.0207669734954834 | acc: 0.6800000071525574 | acc_grad: [0.00476923]                        
Epoch: 7
loss: 1.0933377742767334 | acc: 0.7053999900817871 | acc_grad: [0.00372015]                        
Epoch: 8
loss: 0.6805068850517273 | acc: 0.7202000021934509 | acc_grad: [0.00327785]                        
Epoch: 9
loss: 0.5108701586723328 | acc: 0.7369999885559082 | acc_grad: [0.00245523]                        
Epoch: 10
loss: 0.6718326210975647 | acc: 0.7423999905586243 | acc_grad: [0.00169369]           

In [14]:
for i in range(0, 5):
    silu_relu_loss_arr, silu_relu_acc_validation, silu_relu_model, silu_relu_optimiser = run_test(lambda x: ConstVarInitialisation(x, normalising_coefficient=1.4142, normal=True), activation=torch.nn.SiLU)
    silu_relu_model.to("cpu")
    save_arr_local("silu_relu_loss_arr", silu_relu_loss_arr, i)
    save_arr_local("silu_relu_acc_validation", silu_relu_acc_validation, i)

    save_net_local("silu_relu_model", silu_relu_model, i)
    save_net_local("silu_relu_model", silu_relu_optimiser, i)

    del silu_relu_loss_arr
    del silu_relu_acc_validation
    del silu_relu_model
    del silu_relu_optimiser

    print()


Epoch: 1
loss: 1.9155768156051636 acc: 0.2897999882698059                         
Epoch: 2
loss: 1.5616042613983154 acc: 0.44919997453689575                        
Epoch: 3
loss: 1.338944911956787 acc: 0.5083999633789062                          
Epoch: 4
loss: 1.1093645095825195 | acc: 0.5830000042915344 | acc_grad: [0.01157169]                        
Epoch: 5
loss: 1.0710248947143555 | acc: 0.6146000027656555 | acc_grad: [0.00808215]                        
Epoch: 6
loss: 0.9262204766273499 | acc: 0.6516000032424927 | acc_grad: [0.00587539]                        
Epoch: 7
loss: 0.9672247767448425 | acc: 0.6841999888420105 | acc_grad: [0.00408569]                        
Epoch: 8
loss: 0.7887572050094604 | acc: 0.6973999738693237 | acc_grad: [0.00363815]                        
Epoch: 9
loss: 0.5908617377281189 | acc: 0.7077999711036682 | acc_grad: [0.00265585]                        
Epoch: 10
loss: 0.49504780769348145 | acc: 0.739799976348877 | acc_grad: [0.00244446]           

In [15]:
for i in range(0, 5):
    silu_const_var_loss_arr, silu_const_var_acc_validation, silu_const_var_model, silu_const_var_optimiser = run_test(lambda x: ConstVarInitialisation(x, 1.6766, normal=True), activation=torch.nn.SiLU)
    silu_const_var_model.to("cpu")

    save_arr_local("silu_const_var_loss_arr", silu_const_var_loss_arr, i)
    save_arr_local("silu_const_var_acc_validation", silu_const_var_acc_validation, i)

    save_net_local("silu_const_var_model", silu_const_var_model, i)
    save_net_local("silu_const_var_optimiser", silu_const_var_optimiser, i)

    del silu_const_var_loss_arr
    del silu_const_var_acc_validation
    del silu_const_var_model
    del silu_const_var_optimiser

    print()


Epoch: 1
loss: 1.7536481618881226 acc: 0.4397999942302704                         
Epoch: 2
loss: 1.1916961669921875 acc: 0.5327999591827393                         
Epoch: 3
loss: 1.4627113342285156 acc: 0.5740000009536743                        
Epoch: 4
loss: 1.1275980472564697 | acc: 0.6169999837875366 | acc_grad: [0.00812092]                        
Epoch: 5
loss: 1.1130552291870117 | acc: 0.6491999626159668 | acc_grad: [0.00477815]                        
Epoch: 6
loss: 0.783696174621582 | acc: 0.6909999847412109 | acc_grad: [0.00442846]                         
Epoch: 7
loss: 0.8731233477592468 | acc: 0.7111999988555908 | acc_grad: [0.00391554]                        
Epoch: 8
loss: 0.5483621954917908 | acc: 0.6823999881744385 | acc_grad: [0.00264046]                        
Epoch: 9
loss: 0.6165294051170349 | acc: 0.738599956035614 | acc_grad: [0.00192292]                         
Epoch: 10
loss: 0.5415131449699402 | acc: 0.7459999918937683 | acc_grad: [0.00165831]            

In [16]:
for i in range(0, 5):
    relu_xavier_loss_arr, relu_xavier_acc_validation, relu_xavier_model, relu_xavier_optimiser = run_test(lambda x: XavierInitialisation(x), activation=torch.nn.ReLU)
    relu_xavier_model.to("cpu")

    save_arr_local("relu_xavier_const_var_loss_arr", relu_xavier_loss_arr, i)
    save_arr_local("relu_xavier_acc_validation", relu_xavier_acc_validation, i)

    save_net_local("relu_xavier_model", relu_xavier_model, i)
    save_net_local("relu_xavier_model", relu_xavier_optimiser, i)

    del relu_xavier_loss_arr
    del relu_xavier_acc_validation
    del relu_xavier_model
    del relu_xavier_optimiser

    print()


Epoch: 1
loss: 2.3033862113952637 acc: 0.09679999947547913                        
Epoch: 2
loss: 2.2967543601989746 acc: 0.2133999913930893                         
Epoch: 3
loss: 2.0559635162353516 acc: 0.24479998648166656                        
Epoch: 4
loss: 1.633154273033142 | acc: 0.3211999833583832 | acc_grad: [0.00918262]                          
Epoch: 5
loss: 1.6973848342895508 | acc: 0.35420000553131104 | acc_grad: [0.00751462]                        
Epoch: 6
loss: 1.5837205648422241 | acc: 0.3840000033378601 | acc_grad: [0.00696385]                         
Epoch: 7
loss: 1.354397177696228 | acc: 0.4657999873161316 | acc_grad: [0.00594631]                          
Epoch: 8
loss: 1.2317569255828857 | acc: 0.5009999871253967 | acc_grad: [0.00678169]                         
Epoch: 9
loss: 1.3739430904388428 | acc: 0.5365999937057495 | acc_grad: [0.00586569]                         
Epoch: 10
loss: 1.3959945440292358 | acc: 0.5703999996185303 | acc_grad: [0.00400646]     

In [17]:
for i in range(0, 5):
    selu_const_var_loss_arr, selu_const_var_acc_validation, selu_const_var_model, selu_const_var_optimiser = run_test(lambda x: ConstVarInitialisation(x, 1.0000, normal=True), activation=torch.nn.SELU)
    selu_const_var_model.to("cpu")

    save_arr_local("selu_const_var_loss_arr", selu_const_var_loss_arr, i)
    save_arr_local("selu_const_var_acc_validation", selu_const_var_acc_validation, i)

    save_net_local("selu_const_var_model", selu_const_var_model, i)
    save_net_local("selu_const_var_optimiser", selu_const_var_optimiser, i)

    del selu_const_var_loss_arr
    del selu_const_var_acc_validation
    del selu_const_var_model
    del selu_const_var_optimiser

    print()


Epoch: 1
loss: 1.1822665929794312 acc: 0.5525999665260315                         
Epoch: 2
loss: 1.1515467166900635 acc: 0.6299999952316284                        
Epoch: 3
loss: 0.9912800192832947 acc: 0.6935999989509583                        
Epoch: 4
loss: 0.7519784569740295 | acc: 0.7233999967575073 | acc_grad: [0.00797723]                        
Epoch: 5
loss: 1.0060347318649292 | acc: 0.7235999703407288 | acc_grad: [0.00426769]                        
Epoch: 6
loss: 0.3980664610862732 | acc: 0.7613999843597412 | acc_grad: [0.002544]                           
Epoch: 7
loss: 0.37546849250793457 | acc: 0.7635999917984009 | acc_grad: [0.00158338]                        
Epoch: 8
loss: 0.4101008176803589 | acc: 0.7730000019073486 | acc_grad: [0.00178723]                         
Epoch: 9
loss: 0.6122859120368958 | acc: 0.7767999768257141 | acc_grad: [0.00121061]                         
Epoch: 10
loss: 0.48173952102661133 | acc: 0.7703999876976013 | acc_grad: [0.000772]          

In [18]:
for i in range(0, 5):
    selu_const_var_loss_arr, selu_const_var_acc_validation, selu_const_var_model, selu_const_var_optimiser = run_test(lambda x: ConstVarInitialisation(x, normalising_coefficient=1.4142, normal=True), activation=torch.nn.SELU)
    selu_const_var_model.to("cpu")

    save_arr_local("selu_he_loss_arr", selu_const_var_loss_arr, i)
    save_arr_local("selu_he_acc_validation", selu_const_var_acc_validation, i)

    save_net_local("selu_he_model", selu_const_var_model, i)
    save_net_local("selu_he_optimiser", selu_const_var_optimiser, i)

    del selu_const_var_loss_arr
    del selu_const_var_acc_validation
    del selu_const_var_model
    del selu_const_var_optimiser

    print()


Epoch: 1
loss: 1.7813655138015747 acc: 0.483599990606308                          
Epoch: 2
loss: 1.0469251871109009 acc: 0.5807999968528748                        
Epoch: 3
loss: 0.9005596041679382 acc: 0.6377999782562256                        
Epoch: 4
loss: 0.7905160188674927 | acc: 0.6588000059127808 | acc_grad: [0.00724169]                        
Epoch: 5
loss: 0.6445820331573486 | acc: 0.6959999799728394 | acc_grad: [0.00514677]                        
Epoch: 6
loss: 0.7522552013397217 | acc: 0.6771999597549438 | acc_grad: [0.00326031]                        
Epoch: 7
loss: 0.7467121481895447 | acc: 0.7027999758720398 | acc_grad: [0.00220031]                        
Epoch: 8
loss: 0.45383191108703613 | acc: 0.7437999844551086 | acc_grad: [0.00191785]                        
Epoch: 9
loss: 0.4298092722892761 | acc: 0.7545999884605408 | acc_grad: [0.00222369]                        
Epoch: 10
loss: 0.5727920532226562 | acc: 0.7475999593734741 | acc_grad: [0.00137631]            

In [27]:
relu_xavier_normal_loss_arr, relu_xavier_normal_acc_validation, relu_xavier_normal_model, relu_xavier_normal_optimiser = run_test(lambda x: XavierInitialisationNormal(x), activation=torch.nn.ReLU)
relu_xavier_normal_model.to("cpu")

print()


Epoch: 1
loss: 2.3050475120544434 acc: 0.10080000013113022                        
Epoch: 2
loss: 2.1452205181121826 | acc: 0.19759999215602875 | acc_grad: [0.00534246]                        
Epoch: 3
loss: 1.7694549560546875 | acc: 0.3346000015735626 | acc_grad: [0.00791738]                         
Epoch: 4
loss: 1.6861920356750488 | acc: 0.42179998755455017 | acc_grad: [0.00558862]                        
Epoch: 5
loss: 1.6046465635299683 | acc: 0.483599990606308 | acc_grad: [0.00523523]                          
Epoch: 6
loss: 1.1855528354644775 | acc: 0.5515999794006348 | acc_grad: [0.00390815]                         
Epoch: 7
loss: 1.2513983249664307 | acc: 0.5920000076293945 | acc_grad: [0.00372723]                        
Epoch: 8
loss: 0.8749487400054932 | acc: 0.6211999654769897 | acc_grad: [0.00264062]                        
Epoch: 9
loss: 0.8163394331932068 | acc: 0.6588000059127808 | acc_grad: [0.00228831]                        
Epoch: 10
loss: 0.8594180941581726 | ac

In [28]:
save_arr_local("relu_xavier_normal_const_var_loss_arr", relu_xavier_normal_loss_arr)
save_arr_local("relu_xavier_normal_acc_validation", relu_xavier_normal_acc_validation)

save_net_local("relu_xavier_normal_model", relu_xavier_normal_model)
save_net_local("relu_xavier_normal_optimiser", relu_xavier_normal_optimiser)