In [1]:
import torch
from torch import Tensor

import dlc_practical_prologue as prologue

In [2]:
def nearest_classification(train_input, train_target, x):
    # train_input is a 2d float tensor of nxd containing training vectors
    # train_target is a 1d long tensor of dimension n containing the training labels
    # x is 1d float tensor of d containing the test vector
    distance = (train_input - x).pow(2).sum(1).view(train_input.size(0), -1)
    _ , n = torch.min(distance, 0)
    
    return train_target[n[0]]
    

In [3]:
def compute_nb_errors(train_input, train_target, test_input, test_target, mean = None, proj = None):
    ''' where
    • train ̇input is a 2d float tensor of dimension n × d containing the train vectors,
    • train ̇target is a 1d long tensor of dimension n containing the train labels,
    • test ̇input is a 2d float tensor of dimension m × d containing the test vectors,
    • test ̇target is a 1d long tensor of dimension m containing the test labels,
    • mean is either None or a 1d float tensor of dimension d,
    • proj is either None or a 2d float tensor of dimension c × d,
    '''
    if mean is not None:
        train_input -= mean
        test_input -= mean
    if proj is not None:
        train_input = train_input.mm(proj.t())
        test_input = test_input.mm(proj.t())
    
    nb_errors = 0
    for i in range(test_input.size(0)):
        if (test_target[i] != nearest_classification(train_input, train_target, test_input[i])):
            nb_errors += 1
    
    return nb_errors
    

In [4]:
def PCA(x):
    '''
    where x is a 2d float tensor of dimension n × d, which returns a pair composed of the 1d mean vector
    of dimension d and the PCA basis, ranked in decreasing order of the eigen-values, as a 2d tensor of
    dimension d × d.
    '''
    # Mean vector
    mean = x.mean(0)
    
    # subtract mean
    matrix = x - mean
    
    # covariance matrix
    sigma = matrix.t().mm(matrix)
    
    eigen_values, eigen_vectors = sigma.eig(True)
    ranking = eigen_values[:,0].abs().sort(0, True)[1]
    eigen_vectors = eigen_vectors.t()[ranking]
    
    return mean, eigen_vectors
    

In [10]:
# load data
for c in [False, True]:
    train_input, train_target, test_input, test_target = prologue.load_data(cifar=c)

    nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target)
    print('Baseline nb_errors {:d} error {:.02f}%'.format(nb_errors, 100 * nb_errors / test_input.size(0)))
    
    basis = train_input.new(100, train_input.size(1)).normal_()
    print (basis.size())
    
    nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target, None, basis)
    print('Random {:d}d nb_errors {:d} error {:.02f}%'.format(basis.size(0), nb_errors, 100*nb_errors/test_input.size(0)))
    
    mean, PCA_basis = PCA(train_input)
    print('pca_basis: ', PCA_basis.size())
    
    for d in [100, 50, 10, 3]:
        # turn basis to d x train_input.size(1)
        # or reduce the number of rows
        PCA_basis = PCA_basis.narrow(0,0,d)
        
        nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target, mean, PCA_basis)
        print ('PCA {:d}d nb_errors {:d} error {:.02f}%'. format(d, nb_errors, 100*nb_errors/test_input.size(0)))
        

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples
Baseline nb_errors 172 error 17.20%
torch.Size([100, 784])
Random 100d nb_errors 198 error 19.80%
pca_basis:  torch.Size([784, 784])
PCA 100d nb_errors 164 error 16.40%
PCA 50d nb_errors 155 error 15.50%
PCA 10d nb_errors 214 error 21.40%
PCA 3d nb_errors 597 error 59.70%
* Using CIFAR
Files already downloaded and verified
Files already downloaded and verified
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples
Baseline nb_errors 746 error 74.60%
torch.Size([100, 3072])
Random 100d nb_errors 761 error 76.10%
pca_basis:  torch.Size([3072, 3072])
PCA 100d nb_errors 745 error 74.50%
PCA 50d nb_errors 737 error 73.70%
PCA 10d nb_errors 763 error 76.30%
PCA 3d nb_errors 839 error 83.90%
