In [7]:
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm

In [5]:
def load_reduced_data(keepClasses: list, qubitCount):
    """
    Takes a list of classes to keep and returns reduced mnist dataset.
    Loops over full data and appends to list if the class is in keepClasses.
    A list is used for performance reasons, since Numpy Append copies the entire array
    every time. The lists are cast to numpy array before returning.
    
    :param list keepClasses: List of ints.
    :return: returns two tuples. One with train data and one with test data.
    """
    transDict = {}
    for i in range(len(keepClasses)):
        transDict[keepClasses[i]] = i
    print(transDict)
    
    
    #fashion_mnist = fashion_mnist
    (train_images_full, train_labels_full), (test_images_full, test_labels_full) = tf.keras.datasets.fashion_mnist.load_data()
    
    train_images = []
    train_labels = []
    test_images = []
    test_labels = []
    print("Loading train data")
    for i in tqdm(list(range(len(train_labels_full)))):
        if train_labels_full[i] in keepClasses:
            train_labels.append(transDict[train_labels_full[i]])
            train_images.append(train_images_full[i])
    train_images = np.array(train_images)
    train_labels = np.array(train_labels)
    print("Loading test data")
    for i in tqdm(list(range(len(test_labels_full)))):
        if test_labels_full[i] in keepClasses:
            test_labels.append(transDict[test_labels_full[i]])
            test_images.append(test_images_full[i])
    test_images = np.array(test_images)
    test_labels = np.array(test_labels)
    
    
    # Setup labels as vector outputs rather than digit value
    train_labelsVektor = np.zeros((qubitCount, len(train_labels)))
    for i in range(len(train_labels)):
        for j in range(len(keepClasses)):
            if train_labels[i] == j:
                train_labelsVektor[j, i] += 1
    train_labelsVektor = np.transpose(train_labelsVektor)

    test_labelsVektor = np.zeros((qubitCount, len(test_labels)))
    for i in range(len(test_labels)):
        for j in range(len(keepClasses)):
            if test_labels[i] == j:
                test_labelsVektor[j, i] += 1
    test_labelsVektor = np.transpose(test_labelsVektor)
    
    train_images = train_images / 255.0
    test_images = test_images / 255.0
    
    return (train_images, train_labelsVektor), (test_images, test_labelsVektor)


In [1]:
def nDimPCA(dimOut, data):
    """
    Applies the PCA with to training data 
    
    :param int dimOut: Dimension of the output data
    :param np.array data: Matrices containing the training data
    :return: returns the reduced training data now in reduced dimension
    """
    #Normalize data
    #Flatten input data
    data = data.reshape([np.size(data,0),np.size(data,1)*np.size(data,2)])
    #Mean
    mu = np.mean(data, axis = 0)
    mu = mu.reshape([np.size(mu,0),1])
    #covariance
    data = np.transpose(data)
    sigma = np.cov(data-mu)
    #SVD
    U, S, V = np.linalg.svd(sigma)
    #V reduced
    VReduced = V[0:dimOut,0:]
    #output reduced data
    out = np.dot(VReduced,(data-mu))
    return np.transpose(out),mu,VReduced

def PCAOnTest(VReduced, mu, data):
    """
    Applies the PCA to test data
    
    :param np.array VReduced: Eigenvectors for the PCA
    :param np.array mu: Vector containing the mean values 
    :param np.array data: Matrices containing the test data
    :return: returns the reduced test data now in the same dimension as the training data
    """
    data = data.reshape([np.size(data,0),np.size(data,1)*np.size(data,2)])
    data = np.transpose(data)
    out = np.dot(VReduced,(data-mu))
    
    
    return np.transpose(out)

In [11]:
(train_images, train_labelsVektor), (test_images, test_labelsVektor) = load_reduced_data([0,1,2,3,4,5], 10)

reduced_training_data, mean, VRed = nDimPCA(3, train_images)
test_images_red = PCAOnTest(VRed, mean, test_images)



{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}
Loading train data


  0%|          | 0/60000 [00:00<?, ?it/s]

Loading test data


  0%|          | 0/10000 [00:00<?, ?it/s]