In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical  
import numpy as np
from keras.datasets import cifar100
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.decomposition import PCA


# centre the data
def centre_data(train, validation, test):
    
    # calculate the means for each attribute of the training data
    column_means = np.mean(train, axis=0) 
    
    # centre training data by subtracting training data attribute means
    for i in range(len(train)):
        train[i] = train[i] - column_means
    
    # centre testing data by subtracting training data attribute means
    for x in range(len(test)):
        test[x] = test[x] - column_means
        
    for x in range(len(validation)):
        validation[x] = validation[x] - column_means
        
    return train, test, validation


def autoencoder(training_data, testing_data):

  from keras import layers

  dimension = 32 
  input_image = keras.Input(shape=(3072,))

  encoded = layers.Dense(dimension, activation='relu')(input_image)
  decoded = layers.Dense(3072, activation='sigmoid')(encoded)

  autoencoder = keras.Model(input_image, decoded)
  encoder = keras.Model(input_image, encoded)
  input_encode = keras.Input(shape=(dimension,))
  layers = autoencoder.layers[-1]

  decoder = keras.Model(input_encode, layers(input_encode))
  autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

  autoencoder.fit(training_data, training_data,
                  epochs=50,
                  batch_size=256,
                  shuffle=True,
                  validation_data=(testing_data, testing_data))

  output_encoded = encoder.predict(testing_data)

  return output_encoded


# apply PCA on the data 
def PCA(variance_target, training_data, validation_data, testing_data):

    U, sigma, Vt = np.linalg.svd(training_data, full_matrices=False)
    
    sum_square_singular = np.sum(sigma**2)
    
    ratios = sigma**2/sum_square_singular
    n_components = 0
    explained_variance = 0
    
    # determine how many principle components must be retained to maintain the target level of explained variance
    for i in range(len(ratios)):
        if explained_variance >= variance_target:
            break
        else: 
            n_components += 1
            explained_variance += ratios[i]
    
    return training_data.dot(Vt.T[:, :n_components]), testing_data.dot(Vt.T[:, :n_components]), validation_data.dot(Vt.T[:, :n_components])


def load_in_dataset_and_preprocess(explained_variance):
    (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))
    
    # reshape the data 
    training_data = training_data.reshape(50000, 3072)
    testing_data = testing_data.reshape(10000, 3072)

    # preprocess data
    validation_data = training_data[49000:, :]
    validation_labels = np.squeeze(training_labels[49000:, :])
    training_data = training_data[:49000, :]
    training_labels = np.squeeze(training_labels[:49000, :])
    
    training_data = training_data.astype('float32')
    testing_data = testing_data.astype('float32')
    validation_data = validation_data.astype('float32')


    # Centre data
    training_data, testing_data, validation_data = centre_data(training_data, validation_data, testing_data)

    #apply autoencoder
    #training_data= autoencoder(training_data, testing_data)

    #print(training_data.shape)

    # Apply PCA
    training_data, testing_data, validation_data = PCA(explained_variance, training_data, validation_data, testing_data)

     #one hot encoding
    #training_labels = to_categorical(training_labels, 20)
    #testing_labels = to_categorical(testing_labels, 20)
    
    return training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels

training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels = load_in_dataset_and_preprocess(0.8)


# Normalization of pixel values (to [0-1] range)
training_data = training_data / 255
testing_data = testing_data / 255

# Normalization of pixel values (to [0-1] range)
from keras.optimizers import Adam
from keras.layers import Dense, Activation, Dropout

model = Sequential()

model.add(Dense(256, activation='relu', input_dim=training_data.shape[1])) #training_data
model.add(Dropout(0.5))
model.add(Dense(256,activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='softmax')) # will slow the classifier down significantly as you increase the dimensionality


sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

# can also use loss function categorical_crossentropy
# or optimiser SGD
# try with different optimisers and loss functions

adam = Adam(lr=0.001, decay=1e-6)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(training_data, training_labels, epochs=50, batch_size=32, verbose=2, validation_split=0.2)
score = model.evaluate(testing_data, testing_labels, batch_size=128, verbose=0)

print(model.metrics_names)
print(score)

**ZCA- Image Whitening**

In [None]:

def zca_whitening_matrix(matrix):
 
    sigma = np.cov(matrix, row=True)
    U,S,V = np.linalg.svd(sigma)
    e = 1e-5
    result = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + e)), U.T)) 

    return result

**Feature Extraction with Autoencoders**

In [None]:
import keras
from keras import layers
from keras.datasets import cifar100
import numpy as np

dimension = 32 
input_image = keras.Input(shape=(3072,))

encoded = layers.Dense(dimension, activation='relu')(input_image)
decoded = layers.Dense(3072, activation='sigmoid')(encoded)

autoencoder = keras.Model(input_image, decoded)
encoder = keras.Model(input_image, encoded)
input_encode = keras.Input(shape=(dimension,))
layers = autoencoder.layers[-1]

decoder = keras.Model(input_encode, layers(input_encode))
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

(training_data, _), (testing_data, _) = (cifar100.load_data("coarse"))

training_data = training_data.astype('float32') / 255.
testing_data = testing_data.astype('float32') / 255.
training_data = training_data.reshape((len(training_data), np.prod(training_data.shape[1:])))
testing_data = testing_data.reshape((len(testing_data), np.prod(testing_data.shape[1:])))

autoencoder.fit(training_data, training_data,
                epochs=50,
                batch_size=256,
                shuffle=True,
                validation_data=(testing_data, testing_data))

output_encoded = encoder.predict(testing_data)
print(output_encoded)