## MSELoss Functionality Test

Network Class

In [1]:
import torch.nn as nn
from collections import OrderedDict

class NN(nn.Module):

    def __init__(self, input_size, middle_width, num_classes):


        super(NN, self).__init__()
        self.features = nn.Sequential(OrderedDict([
            ('hidden_layer', nn.Linear(input_size, middle_width)),
            ('hidden_activation', nn.ReLU()),
        ]))
        self.readout = nn.Linear(middle_width, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.readout(x)

        return x

Network Functions

In [92]:
import torch  # Base torch library
from torch.utils.data import DataLoader  # Minibathces
import torchvision.datasets as datasets  # MNIST dataset
import torchvision.transforms as transforms
import numpy as np
from torch.nn.functional import one_hot


def set_device():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return device


def mnist_dataset(batch_size, train=True, values=list(range(10))):
    # Initializing MNIST data set.
    dataset = datasets.MNIST(root='dataset/', train=train, transform=transforms.ToTensor(), download=True)

    targets_list = dataset.targets.tolist()
    values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

    # Creating a subset of ### MNIST targets.
    subset = torch.utils.data.Subset(dataset, values_index)
    loader = DataLoader(dataset=subset, batch_size=batch_size, shuffle=True)

    return loader


def train(loader, device, model, loss_function, optimizer_function, values=list(range(10))):
    # Training on each data point.

    # Set array full of zeros.
    kernel_alignments = torch.zeros(len(loader))

    for batch_idx, (data, targets) in enumerate(loader):
        data = data.reshape(data.shape[0], -1).to(device=device)
        targets = targets[1].to(torch.float32).to(device=device)

        # Forwards.
        scores = model(data)
        # loss = loss_function(scores, classify_targets(targets, values))

        labels = one_hot(targets.long() % 10).to(torch.float32)
        print(f" Scores: {scores.size()} and {scores}")
        
        print(f" Labels: {labels.size()} and {labels}")
        print(labels)
        output = loss_function(scores, labels)

        # Backwards.
        optimizer_function.zero_grad()
        output.backward()

        optimizer_function.step()
        phi = model.features(data)
        print("PHI:")
        print(phi.size())
        print("TARGETS:" )
        print(targets.size())

        kernel_alignments[batch_idx] = kernel_calc(targets, phi)

    # return torch.mean(kernel_alignments).item(), torch.std(kernel_alignments).item()/len(kernel_alignments)
    # return mean and STD or STE of kernel alignment


def record_accuracy(device, model, train_loader, test_loader, epoch, ste, mean, values=list(range(10))):
    epoch_accuracy = np.array([[
        epoch + 1,
        check_accuracy(device, model, train_loader, values).cpu(),
        check_accuracy(device, model, test_loader, values).cpu(),
        mean,
        ste
    ]])

    return epoch_accuracy


def check_accuracy(device, model, loader, values=list(range(10))):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = classify_targets(y, values).to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            # 64images x 10,

            predictions = scores.argmax(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    return num_correct / num_samples


def classify_targets(targets, values):
    new_targets = targets.clone()

    # Changing targets to a classifiable number.
    for key, element in enumerate(values):
        new_targets[targets == element] = key
    return new_targets


# Kernel Alignment Fucntions

def kernel_calc(y, phi):

    # Output Kernel
    y = torch.t(torch.unsqueeze(y, -1))
    K1 = torch.matmul(torch.t(y), y)
    K1c = kernel_centering(K1.float())

    # Feature Kernel
    K2 = torch.mm(phi, torch.t(phi))
    K2c = kernel_centering(K2)

    return kernel_alignment(K1c, K2c)


def frobenius_product(K1, K2):
    return torch.trace(torch.mm(K2, torch.t(K1)))


def kernel_alignment(K1, K2):
    return frobenius_product(K1, K2) / ((torch.norm(K1, p='fro') * torch.norm(K2, p='fro')))


def kernel_centering(K):
    # Lemmna 1

    m = K.size()[0]
    I = torch.eye(m)
    l = torch.ones(m, 1)

    # I - ll^T / m
    mat = I - torch.matmul(l, torch.t(l)) / m

    return torch.matmul(torch.matmul(mat, K), mat)


def ones(vector):
    for i in range(vector.size()[1]):
        if vector[0][i] == 9:
            vector[0][i] = 1
        elif vector[0][i] == 8:
            vector[0][i] = -1
    return vector


Traing the model

In [3]:
# Setting device
device = set_device()

In [93]:
# Initializing the model
model = NN(784, 100, 10)

In [95]:
# Loading MNIST values
train_loader = mnist_dataset(7)
validate_loader = mnist_dataset(7)

MNIST digits [2, 7]


In [96]:
# Loss function
loss = nn.MSELoss() #!! REPLACE W/ MSELOSS

In [97]:
# Optimizer
import torch.optim as optim
optimizer = optim.SGD([{'params': model.features.hidden_layer.parameters()},
                          {'params': model.readout.parameters(),
                           'lr': 0.1}],
                         lr=0.01)

In [98]:
# Creating an empty array for storing accuracy metrics
import numpy as np
accuracy = np.zeros((5, 5))

In [99]:
train(train_loader, device, model, loss, optimizer)

 Scores: torch.Size([7, 10]) and tensor([[-0.0443, -0.0039,  0.0969, -0.0415,  0.0043, -0.0143, -0.0605,  0.1720,
          0.0308,  0.1102],
        [-0.1027, -0.0835,  0.0890,  0.0021, -0.0115, -0.0006, -0.0220,  0.0706,
          0.0942,  0.0448],
        [-0.0499,  0.0339,  0.0501,  0.0044,  0.0005, -0.0270, -0.0042,  0.0668,
         -0.0091,  0.1061],
        [-0.1128, -0.0676,  0.0688, -0.0627, -0.0384, -0.0238, -0.1321,  0.0815,
          0.0197,  0.0836],
        [-0.0815,  0.0172,  0.0584, -0.0229, -0.0379,  0.0453, -0.0857,  0.0100,
          0.0651,  0.0497],
        [-0.0423, -0.0007,  0.1351, -0.0120, -0.0552, -0.0140, -0.0430,  0.1131,
          0.0523,  0.1119],
        [-0.0478, -0.0577,  0.1366,  0.0322, -0.0430, -0.0667, -0.1478,  0.0271,
          0.0730,  0.0165]], grad_fn=<AddmmBackward0>)
 Labels: torch.Size([3]) and tensor([0., 0., 1.])
tensor([0., 0., 1.])


RuntimeError: The size of tensor a (10) must match the size of tensor b (3) at non-singleton dimension 1

In [12]:
# Accuracy csv
import pandas as pd
complete_dataframe = pd.DataFrame(accuracy).to_csv('accuracy_metrics')

Fixing MNSIT Dataset

In [61]:
def mnist_dataset(batch_size, train=True, values=list(range(10))):
    # Initializing MNIST data set.
    dataset = datasets.MNIST(root='dataset/', train=train, transform=transforms.ToTensor(), download=True)

    targets_list = dataset.targets.tolist()
    values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

    # Creating a subset of ### MNIST targets.
    subset = torch.utils.data.Subset(dataset, values_index)
    loader = DataLoader(dataset=subset, batch_size=batch_size, shuffle=True)

    return subset

In [62]:
dataset = datasets.MNIST(root='dataset/', train=train, transform=transforms.ToTensor(), download=True)

In [65]:
values = list(range(0,10))
targets_list = dataset.targets.tolist()
values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

In [67]:
# Creating a subset of ### MNIST targets.
batch_size = 200
subset = torch.utils.data.Subset(dataset, values_index)
loader = DataLoader(dataset=subset, batch_size=batch_size, shuffle=True)

In [28]:

import torchvision.transforms as transforms
import torchvision.datasets as datasets 
values = [1, 1]
batch_size = 12002
dataset = datasets.MNIST(root='dataset/', transform=transforms.ToTensor(), download=True)

targets_list = dataset.targets.tolist()
values_index = [i for i in range(len(dataset)) if targets_list[i] in values]

# Creating a subset of ### MNIST targets.
subset = torch.utils.data.Subset(dataset, values_index)
loader = DataLoader(dataset=subset, batch_size=batch_size, shuffle=True)


In [77]:
i = 0
for data, targets in enumerate(loader):
    if i == 0:
        t = targets
    i += 1

In [84]:
t[1].size()

torch.Size([200])

In [71]:
t.to(torch.float32)

tensor([6., 7., 8., 8., 7., 0., 4., 3., 0., 0., 7., 8., 7., 4., 9., 3., 6., 1.,
        6., 4., 1., 6., 1., 1., 8., 1., 7., 6., 4., 4., 9., 9., 7., 8., 2., 7.,
        1., 9., 6., 2., 9., 3., 1., 7., 6., 2., 3., 3., 0., 6., 9., 5., 9., 4.,
        4., 1., 2., 7., 0., 9., 6., 9., 2., 7., 1., 3., 8., 9., 0., 3., 3., 0.,
        4., 7., 8., 1., 1., 9., 8., 7., 2., 5., 8., 8., 9., 1., 7., 2., 1., 3.,
        8., 0., 0., 8., 6., 6., 7., 2., 1., 4., 2., 0., 2., 7., 8., 0., 1., 4.,
        1., 4., 9., 1., 5., 5., 5., 7., 6., 2., 5., 2., 7., 7., 4., 5., 0., 0.,
        2., 4., 4., 5., 5., 9., 7., 7., 2., 3., 8., 1., 6., 5., 7., 1., 0., 9.,
        1., 1., 3., 4., 6., 0., 0., 5., 0., 2., 9., 4., 9., 8., 7., 0., 2., 7.,
        6., 9., 0., 8., 2., 5., 6., 3., 6., 7., 0., 8., 8., 8., 5., 0., 9., 3.,
        8., 7., 8., 7., 3., 5., 9., 5., 1., 0., 1., 2., 9., 5., 9., 9., 5., 0.,
        9., 8.])

In [75]:
one_hot(t.long() % 10).size()

torch.Size([200, 10])

In [13]:

torch.randn(3,5)

tensor([[-0.8044, -0.4141, -0.7076, -1.2489, -0.2123],
        [-0.7767,  0.5124,  0.4282, -0.4937,  0.2222],
        [ 0.7859,  0.6405,  0.4175,  1.9157,  0.8038]])

In [12]:
torch.randn(3,5, requires_grad=True)

tensor([[-0.9557, -0.2617, -0.8787, -0.7586,  0.0471],
        [-0.8757, -0.7160, -0.1519,  1.9531, -0.3153],
        [-0.7677,  1.0559,  0.0542, -0.6688,  1.0281]], requires_grad=True)