## One-Hot-Encoding Test Notebook

Network class.
Contains the nueral network object w/ all features.

In [1]:
# Network Class

import torch.nn as nn # Nueral network module.
from collections import OrderedDict # Dictinary module.

class NN(nn.Module):

    def __init__(self, input_size, middle_width, num_classes):


        super(NN, self).__init__()
        self.features = nn.Sequential(OrderedDict([
            ('hidden_layer', nn.Linear(input_size, middle_width)),
            ('hidden_activation', nn.ReLU()),
        ]))
        self.readout = nn.Linear(middle_width, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.readout(x)

        return x

Training Functions

In [2]:
import torch  # Base torch library
from torch.utils.data import DataLoader  # Minibathces
import torchvision.datasets as datasets  # MNIST dataset
import torchvision.transforms as transforms
import numpy as np


def set_device():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return device


def mnist_dataset(batch_size, train=True, values=list(range(10))):
    # Initializing MNIST data set.
    dataset = datasets.MNIST(root='dataset/', train=train, transform=transforms.ToTensor(), download=True)

    targets_list = dataset.targets.tolist()
    values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

    # Creating a subset of ### MNIST targets.
    subset = torch.utils.data.Subset(dataset, values_index)
    loader = DataLoader(dataset=subset, batch_size=batch_size, shuffle=True)

    return loader


def train(loader, device, model, loss_function, optimizer_function, values=list(range(10))):
    # Training on each data point.

    # Set array full of zeros.
    kernel_alignments = torch.zeros(len(loader))

    for batch_idx, (data, targets) in enumerate(loader):
        data = data.reshape(data.shape[0], -1).to(device=device)
        targets = targets.to(device=device)

        # Forwards.
        scores = model(data)
        loss = loss_function(scores, classify_targets(targets, values))

        # Backwards.
        optimizer_function.zero_grad()
        loss.backward()

        optimizer_function.step()
        phi = model.features(data)

        kernel_alignments[batch_idx] = kernel_calc(targets, phi)

    return torch.mean(kernel_alignments).item(), torch.std(kernel_alignments).item()/len(kernel_alignments)
    # return mean and STD or STE of kernel alignment


def record_accuracy(device, model, train_loader, test_loader, epoch, ste, mean, values=list(range(10))):
    epoch_accuracy = np.array([[
        epoch + 1,
        check_accuracy(device, model, train_loader, values).cpu(),
        check_accuracy(device, model, test_loader, values).cpu(),
        mean,
        ste
    ]])

    return epoch_accuracy


def check_accuracy(device, model, loader, values=list(range(10))):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = classify_targets(y, values).to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            # 64images x 10,

            predictions = scores.argmax(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    return num_correct / num_samples


def classify_targets(targets, values):
    new_targets = targets.clone()

    # Changing targets to a classifiable number.
    for key, element in enumerate(values):
        new_targets[targets == element] = key
    return new_targets


# Kernel Alignment Fucntions

def kernel_calc(y, phi):

    # Output Kernel
    y = torch.t(torch.unsqueeze(y, -1))
    K1 = torch.matmul(torch.t(y), y)
    K1c = kernel_centering(K1.float())

    # Feature Kernel
    K2 = torch.mm(phi, torch.t(phi))
    K2c = kernel_centering(K2)

    return kernel_alignment(K1c, K2c)


def frobenius_product(K1, K2):
    return torch.trace(torch.mm(K2, torch.t(K1)))


def kernel_alignment(K1, K2):
    return frobenius_product(K1, K2) / ((torch.norm(K1, p='fro') * torch.norm(K2, p='fro')))


def kernel_centering(K):
    # Lemmna 1

    m = K.size()[0]
    I = torch.eye(m)
    l = torch.ones(m, 1)

    # I - ll^T / m
    mat = I - torch.matmul(l, torch.t(l)) / m

    return torch.matmul(torch.matmul(mat, K), mat)


def ones(vector):
    for i in range(vector.size()[1]):
        if vector[0][i] == 9:
            vector[0][i] = 1
        elif vector[0][i] == 8:
            vector[0][i] = -1
    return vector

Traing the model

In [3]:
# Checking & Setting Device Allocation
device = set_device()
print(f"Running on {device}")

# Hyper Parameters
hp = {
    "Input Size": 784,
    "Middle Layer Width": 2,
    "Num Classes": 2,
    "Regular Learning Rate": 0.01,
    "Slow Learning Rate": 0.001,
    "Batch Size": 10,
    "Epochs": 20
}
print(f"Hyper Parameters: {hp}")

# Initializing Model
slow_model = NN(input_size=hp["Input Size"],
                middle_width=hp["Middle Layer Width"],
                num_classes=hp["Num Classes"]).to(device=device)

reg_model = NN(input_size=hp["Input Size"],
                middle_width=hp["Middle Layer Width"],
                num_classes=hp["Num Classes"]).to(device=device)

# Loading MNIST Dataset
mnist_values = [2, 7]
print(f"MNIST digits {mnist_values}")
train_loader = mnist_dataset(hp["Batch Size"], values=mnist_values)
validate_loader = mnist_dataset(hp["Batch Size"], train=False, values=mnist_values)

# Loss function
loss_function = nn.MSELoss()
# Optimizers
sl_optimizer = optim.SGD([{'params': slow_model.features.hidden_layer.parameters()},
                            {'params': slow_model.readout.parameters(),
                            'lr': hp["Regular Learning Rate"]}],
                            lr=hp["Slow Learning Rate"])
r_optimizer = optim.SGD(reg_model.parameters(), lr=hp["Regular Learning Rate"])

# Creating 'empty' arrays for future storing of accuracy metrics
slow_accuracy = np.zeros((hp["Epochs"], 5))
regular_accuracy = np.zeros((hp["Epochs"], 5))

print("Training models...")
for epoch in range(hp["Epochs"]):

    # Slow Model
    sl_mean, sl_ste = train(train_loader, device, slow_model, loss_function, sl_optimizer, values=mnist_values)
    slow_accuracy[epoch][0] = epoch + 1
    slow_accuracy[epoch][1] = check_accuracy(device, slow_model, train_loader, mnist_values).cpu()
    slow_accuracy[epoch][2] = check_accuracy(device, slow_model, validate_loader, mnist_values).cpu()
    slow_accuracy[epoch][3] = sl_mean
    slow_accuracy[epoch][4] = sl_ste
    # CALCULATE THE K.A. AND RECORD IT TO A CSV (FOR SLOW MODEL)
    print("Slow: ")
    print(slow_accuracy[epoch])

    # Regular Model
    reg_mean, reg_ste = train(train_loader, device, reg_model, loss_function, r_optimizer, values=mnist_values)
    regular_accuracy[epoch][0] = epoch + 1
    regular_accuracy[epoch][1] = check_accuracy(device, reg_model, train_loader, mnist_values).cpu()
    regular_accuracy[epoch][2] = check_accuracy(device, reg_model, validate_loader, mnist_values).cpu()
    regular_accuracy[epoch][3] = reg_mean
    regular_accuracy[epoch][4] = sl_ste


    # CALCULATE THE K.A. AND RECORD IT TO A CSV (FOR REG MODEL)
    print("Reg: ")
    print(regular_accuracy[epoch])
    print(f"-Finished epoch {epoch + 1}/{hp['Epochs']}")

    # compute al. on both t and v.

# Accuracy csv
complete_array = np.concatenate((slow_accuracy, regular_accuracy), axis=1)
complete_dataframe = pd.DataFrame(complete_array).to_csv('../accuracy_metrics')
print(f"-Saved accuracy metrics as 'accuracy_metrics'")

# Saving the entire model
torch.save(slow_model.state_dict(), '../slow_model.pt')
print(f"-Saved Regular Model Parameters as 'slow_model.pt'")
torch.save(reg_model.state_dict(), '../reg_model.pt')
print(f"-Saved Regular Model Parameters as 'reg_model.pt'")

0.2%

Running on cpu
Hyper Parameters: {'Input Size': 784, 'Middle Layer Width': 2, 'Num Classes': 2, 'Regular Learning Rate': 0.01, 'Slow Learning Rate': 0.001, 'Batch Size': 10, 'Epochs': 20}
MNIST digits [2, 7]
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw


102.8%


Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz



100.0%


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


112.7%


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



NameError: name 'optim' is not defined