In [1]:
from tensorflow.python.client import device_lib
import os
import tensorflow as tf

# Set the environment variables
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Double check that you have the correct devices visible to TF
print("{0}\nThe available CPU/GPU devices on your system\n{0}".format('=' * 100))
print(device_lib.list_local_devices())

  from ._conv import register_converters as _register_converters


The available CPU/GPU devices on your system
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1059698111660476798
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2187788288
locality {
  bus_id: 1
}
incarnation: 11686142320101627393
physical_device_desc: "device: 0, name: GeForce GTX 970M, pci bus id: 0000:01:00.0, compute capability: 5.2"
]


In [2]:
import numpy as np
import input_data
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.initializers import RandomNormal
from keras.utils import np_utils
from keras import backend as K
from keras import optimizers
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from copy import deepcopy
#from imageset import ImageSet
K.set_image_dim_ordering('th')
import datetime
import pandas as pd

zfac = 0.0
epochs = 51
components_g = 30
iterations_g = 5
keep_g = False
samples = 10

Using TensorFlow backend.


In [3]:
import numpy as np
from sklearn.decomposition import PCA
from copy import deepcopy

epsilon = 0.001

class ImageSet():
    """ This a set of images. A method can dirty them up.
        A method can recover them from a pca model"""
    
    def __init__(self, X, y):
        self.X = X
        self.X_project = np.copy(X)
#        self.X_project = None
        self.y = y
        self.mean = None
        self.pca = None
        self.image_mask = []
        
#    @classmethod
    def dirty(self, zerofac):
        """ This method dirty's the images up by a factor of zerofac.
            If zerofac is 1 the entire image is set to zero. If it is
            0 the image is not changed at all."""
        points = np.shape(self.X)[1]
        for iimage in range(np.shape(self.X)[0]):
            raw_mask = np.random.ranf(points) + (0.5 - zerofac)
            mask = np.rint(raw_mask)
            self.X[iimage] = self.X[iimage] * mask
            """ This last modification is a tweak to note where points were set
                to zero for future recovery"""
            self.X[iimage] = self.X[iimage] + epsilon*(mask-1.0)
            image_mask = self.X[iimage] > -epsilon/2.0
            self.image_mask.append(image_mask)
        self.X_project = np.copy(self.X)
        
    def pca_calc(self, components):
        """ This method calculates the pca model using the using the
            the images in this Image_set """

        self.pca = PCA(n_components=components)
        self.pca.fit(self.X_project)
        
    def mean_calc(self):
        """ This method calculates the mean of the images over all images.
            It only calculates over the clean images so masks must be 
            calculated for each image."""
            
        image_count = np.zeros(np.shape(self.X)[1])
        image_sum = np.zeros(np.shape(self.X)[1])
        
        for iimage in range(np.shape(self.X)[0]):
            image = self.X[iimage]
#            image_mask_good = image > -0.00001
            image_mask = self.image_mask[iimage]
            image_count[image_mask] += 1
            image_sum[image_mask] += image[image_mask]
            
        self.mean = image_sum/image_count
        
    def recover_from_pca(self, pca, keep_orig = False):
        """ This method recovers images from a passed pca object.
            It puts the recovered image in X_project.
            keep_orig means to keep values at original points if
            true. If False overwrite with values from pca projection"""
#        for iimage in range(np.shape(self.X)[0]):
        for iimage in range(np.shape(self.X)[0]):
#            if (iimage % 10000 == 0):
#                print(iimage)
            image = self.X[iimage]
            image_prime = image - pca.mean_
#            image_mask_indicies = image < 0.0
            image_mask = np.invert(self.image_mask[iimage])
            eigen_vec = deepcopy(pca.components_)
            for i in range(pca.n_components_):
                eigen_vec[i][image_mask] = 0.0
            eigen_vec_transpose = eigen_vec.transpose()
            A = eigen_vec.dot(eigen_vec_transpose)
            b = np.zeros(pca.n_components_)
            for i in range(pca.n_components_):
                b[i] = image_prime.dot(eigen_vec[i])
            coeff = np.linalg.solve(A,b)
            image = np.zeros(np.shape(self.X)[1])
            
            for i in range(pca.n_components_):
                image += coeff[i] * pca.components_[i]
            image += pca.mean_
            
            if keep_orig:
                image[~image_mask] = self.X[iimage][~image_mask]
            
            self.X_project[iimage] = image
            
    def recover_from_pca_mean(self,pca):
        """ This method replaces missing values with values from the mean computed
           from a principle component analysis of clean images"""
           
        
        for iimage in range(np.shape(self.X)[0]):
            
            image = np.copy(self.X[iimage])
#            image_mask_indicies = image < 0.0
            image_mask = np.invert(self.image_mask[iimage])
            image[image_mask] = pca.mean_[image_mask]
            
            self.X_project[iimage] = image
            
    def recover_from_self_mean(self):
        " This method replaces missing values with values from own mean"""
#        image = np.zeros(np.shape(self.X)[1])
        
        for iimage in range(np.shape(self.X)[0]):
            image = np.copy(self.X[iimage])
#            image_mask_indicies = image < 0.0
            image_mask = np.invert(self.image_mask[iimage])
            image[image_mask] = self.mean[image_mask]
            self.X_project[iimage] = image
            
            
    def recover_from_self_pca(self, components, iterations, keep_orig = False):
        """ This method recovers images from it's own image set using
            iterative pca technique described in Everson and Sirovich"""
            
        """np.save("self_pca_X_b", self.X[0])
        np.save("self_pca_X_project_b", self.X_project[0])"""
        self.recover_from_self_mean()
        
        
        for _ in range(iterations-1):
            print("doing iteration")
            self.pca_calc(components = components)
            self.recover_from_pca(self.pca, keep_orig=False)
#            sys.exit("Error message")

        self.pca_calc(components = components)
        self.recover_from_pca(self.pca, keep_orig=keep_orig)  
        
        """np.save("self_pca_X_a", self.X[0])
        np.save("self_pca_X_project_a", self.X_project[0])
        sys.exit()"""
        

In [4]:
def larger_model():
    
    # create model
    model = Sequential()
    model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (5, 5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
     
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    
    # Compile model
    adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    
    return model

In [5]:
def run_example(restore = None, zerofac = 0.0, model = None, components = 0, 
                keep = False, iterations = 3):
    """ The run_example runs one example and returns the error. 
        The main program then does statistics on the errors"""
        

    data = input_data.read_data_sets("MNIST_data/", one_hot=True)
    X_train_d, y_train_d = data.train.next_batch(50000)
    X_test_d, y_test_d = data.test.next_batch(11000)
    np.save("out_train_X", X_train_d)
    np.save("out_train_y", y_train_d)
    np.save("out_test_X", X_test_d)
    np.save("out_test_y", y_test_d) 

    """X_train_d = np.load("out_train_X.npy")
    y_train_d = np.load("out_train_y.npy")
    X_test_d = np.load("out_test_X.npy")
    y_test_d = np.load("out_test_y.npy")"""

    N_clean = 5000
    N_images = X_train_d.shape[0]


    clean_images = ImageSet(np.copy(X_train_d[0:N_clean]), np.copy(y_train_d[0:N_clean]))
    clean_images.pca_calc(components)
    
#    train_X = np.copy(X_train_d[N_clean:N_images])
#    train_y = np.copy(y_train_d[N_clean:N_images])
#    dirty_train = ImageSet(train_X, train_y)

    dirty_train = ImageSet(np.copy(X_train_d[N_clean:N_images]), np.copy(y_train_d[N_clean:N_images]))
    dirty_test = ImageSet(np.copy(X_test_d), np.copy(y_test_d))
    
#    dirty_train = ImageSet(X_train_d[N_clean:N_images], y_train_d[N_clean:N_images])
#    dirty_test = ImageSet(X_test_d, y_test_d)

    
    if restore == None:
        pass
    else:
        dirty_train.dirty(zerofac)
        dirty_train.mean_calc()
        dirty_test.dirty(zerofac)
        dirty_test.mean_calc()
    if restore == 'pca_mean':
        dirty_train.recover_from_pca_mean(clean_images.pca)
        dirty_test.recover_from_pca_mean(clean_images.pca)
    elif restore == 'self_mean':
        dirty_train.recover_from_self_mean()
        dirty_test.recover_from_self_mean()
    elif restore == 'pca':
        with tf.device('/cpu:0'):
            dirty_train.recover_from_pca(clean_images.pca, keep_orig=keep)
            dirty_test.recover_from_pca(clean_images.pca, keep_orig=keep)
    elif restore == 'self_pca':
        with tf.device('/cpu:0'): 
            dirty_train.recover_from_self_pca(components = components, iterations=iterations, keep_orig=True)
            dirty_train.pca_calc(components)
            dirty_test.mean_calc()
            dirty_train.pca.mean_ = dirty_test.mean
            dirty_test.recover_from_pca(dirty_train.pca, keep_orig=True)
    elif restore == 'compress':
        with tf.device('/cpu:0'):
            pca = PCA(n_components=components)
            pca.fit(clean_images.X)
            coeffs = pca.transform(dirty_train.X_project)
            dirty_train.X_project = pca.inverse_transform(coeffs)
            coeffs = pca.transform(dirty_test.X_project)
            dirty_test.X_project = pca.inverse_transform(coeffs)
            diff = np.sum(np.absolute(dirty_train.X - dirty_train.X_project))
            mag = np.sum(np.absolute(dirty_train.X))
            print("difference is ", diff, mag, "with components", components)

    X_dirty_train = dirty_train.X_project
    X_dirty_test = dirty_test.X_project

    X_train = X_dirty_train.reshape(X_dirty_train.shape[0], 1, 28, 28).astype('float32')
    X_test = X_dirty_test.reshape(X_dirty_test.shape[0], 1, 28, 28).astype('float32')
# normalize inputs from 0-255 to 0-1
    X_train = X_train / 255
    X_test = X_test / 255

    y_train = dirty_train.y
    y_test = dirty_test.y

# build the model
    model = larger_model()
# Fit the model
    with tf.device('/gpu:0'):
        history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=100, verbose=0)
# Final evaluation of the model
    scores = model.evaluate(X_test, y_test, verbose=0)
    print("Large CNN Error: %.2f%%" % (100-scores[1]*100))
    
    if K.backend() == 'tensorflow':
        K.clear_session()
    del dirty_train
    del dirty_test
    del clean_images
    
    return (100-scores[1]*100)


In [6]:
start = datetime.datetime.now()

#model = larger_model()
#wsave = model.get_weights()

error = 100
clean_errors = np.zeros(samples)
for i in range(samples):
    print("doing clean on sample ",i)
#    model.set_weights(wsave)
#    error = run_example(restore = None, zerofac = zfac)
    clean_errors[i] = error

pca_mean_errors = np.zeros(samples)
for i in range(samples):
    print("doing pca_mean on sample ",i)
#    model.set_weights(wsave)
#    error = run_example(restore = 'pca_mean', zerofac = zfac)
    pca_mean_errors[i] = error
print('the mean for pca_mean is %5.3f with standard deviation of %5.3f', 
      (np.mean(pca_mean_errors), np.std(pca_mean_errors)))

    
self_mean_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_mean on sample ",i)
#    model.set_weights(wsave)
#    error = run_example(restore = 'self_mean', zerofac = zfac)
    self_mean_errors[i] = error
print('the mean for self_mean is %5.3f with standard deviation of %5.3f', 
      (np.mean(self_mean_errors), np.std(self_mean_errors)))
    
keep = True
components = 100
pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing pca on sample ",i)
#    error = run_example(restore = 'pca', zerofac = zfac, components = components, keep = keep)
    pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(pca_errors), np.std(pca_errors)))

keep = True
iterations = 4
components = 100
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
#    error = run_example(restore = 'self_pca', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 100
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 200
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 300
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 400
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 500
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 600
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 700
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

components = 784
self_pca_errors = np.zeros(samples)
for i in range(samples):
    print("doing self_pca on sample ",i)
#    model.set_weights(wsave)
    error = run_example(restore = 'compress', zerofac = zfac, components = components, keep = keep, iterations=iterations)
    self_pca_errors[i] = error
print('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (keep, components, np.mean(self_pca_errors), np.std(self_pca_errors)))

end = datetime.datetime.now()
print("The job took ", end-start)


""" Now output the result """
outfile = 'Result/result'
outfile += ('_zfac'+str(zfac))
outfile += ('_epochs'+str(epochs))
outfile += ('_comps'+str(components))
outfile += ('_iters'+str(iterations))

#np.savez(outfile,clean_errors,pca_mean_errors,self_mean_errors, pca_errors, self_pca_errors)

initial = [[None for _ in range(samples+3)] for _ in range(6)]
col_array = ["" for x in range(samples+2)]
for col in range(samples):
    col_array[col] = 'sample '+str(col)
col_array[samples] = 'mean'
col_array[samples+1] = 'standard deviation'
    
index_array = ["" for x in range(5)]

index_array[0] = "clean_errors"
#clean_errors = np.random.randint(10,size=samples)
for i in range(samples):
    initial[1][i+1] = clean_errors[i]
initial[1][samples+1] = np.mean(clean_errors)
initial[1][samples+2] = np.std(clean_errors)
    
index_array[1] = "pca_mean_errors"
#pca_mean_errors = np.random.randint(10,size=samples)
for i in range(samples):
    initial[2][i+1] = pca_mean_errors[i]
initial[2][samples+1] = np.mean(pca_mean_errors)
initial[2][samples+2] = np.std(pca_mean_errors)
    
index_array[2] = "self_mean_errors"
#self_mean_errors = np.random.randint(10,size=samples)
for i in range(samples):
    initial[3][i+1] = self_mean_errors[i]
initial[3][samples+1] = np.mean(self_mean_errors)
initial[3][samples+2] = np.std(self_mean_errors)

index_array[3] = "pca_errors"
#pca_errors = np.random.randint(10,size=samples)
for i in range(samples):
    initial[4][i+1] = pca_errors[i]
initial[4][samples+1] = np.mean(pca_errors)
initial[4][samples+2] = np.std(pca_errors)
    
index_array[4] = "self_pca_errors"
#self_pca_errors = np.random.randint(10,size=samples)
for i in range(samples):
    initial[5][i+1] = self_pca_errors[i]
initial[5][samples+1] = np.mean(self_pca_errors)
initial[5][samples+2] = np.std(self_pca_errors)

data3_np = np.array(initial)

data3_df = pd.DataFrame(data=data3_np[1:,1:],
                        index=np.array(index_array),
                        columns=np.array(col_array))
outfile += '.csv'

data3_df.to_csv(outfile, sep=',')

('doing clean on sample ', 0)
('doing clean on sample ', 1)
('doing clean on sample ', 2)
('doing clean on sample ', 3)
('doing clean on sample ', 4)
('doing clean on sample ', 5)
('doing clean on sample ', 6)
('doing clean on sample ', 7)
('doing clean on sample ', 8)
('doing clean on sample ', 9)
('doing pca_mean on sample ', 0)
('doing pca_mean on sample ', 1)
('doing pca_mean on sample ', 2)
('doing pca_mean on sample ', 3)
('doing pca_mean on sample ', 4)
('doing pca_mean on sample ', 5)
('doing pca_mean on sample ', 6)
('doing pca_mean on sample ', 7)
('doing pca_mean on sample ', 8)
('doing pca_mean on sample ', 9)
('the mean for pca_mean is %5.3f with standard deviation of %5.3f', (100.0, 0.0))
('doing self_mean on sample ', 0)
('doing self_mean on sample ', 1)
('doing self_mean on sample ', 2)
('doing self_mean on sample ', 3)
('doing self_mean on sample ', 4)
('doing self_mean on sample ', 5)
('doing self_mean on sample ', 6)
('doing self_mean on sample ', 7)
('doing self_mea

('difference is ', 957865.8, 4613154.5, 'with components', 200)
Large CNN Error: 1.38%
('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (True, 200, 1.3445454549789446, 0.056838178237022016))
('doing self_pca on sample ', 0)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 616227.8, 4613081.0, 'with components', 300)
Large CNN Error: 1.47%
('doing self_pca on sample ', 1)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 613612.1, 4604251.0, 'with components', 300)
Large CNN Error: 1.46%
('doing self_pca on sample ', 2)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_d

('difference is ', 76931.51, 4611804.0, 'with components', 500)
Large CNN Error: 1.29%
('doing self_pca on sample ', 5)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 76683.8, 4611040.0, 'with components', 500)
Large CNN Error: 1.32%
('doing self_pca on sample ', 6)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 74909.56, 4610224.5, 'with components', 500)
Large CNN Error: 1.39%
('doing self_pca on sample ', 7)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 75722.93, 4612223.5, 'with components', 

Large CNN Error: 1.33%
('the mean with %s and %d components is %5.3f with standard deviation of %5.3f', (True, 700, 1.3354545463215246, 0.06747512575080133))
('doing self_pca on sample ', 0)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 6.5539985, 4609081.0, 'with components', 784)
Large CNN Error: 1.34%
('doing self_pca on sample ', 1)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('difference is ', 6.4689403, 4609177.0, 'with components', 784)
Large CNN Error: 1.35%
('doing self_pca on sample ', 2)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels