In [1]:
import numpy as np
from sklearn.decomposition import PCA
import h5py
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import TensorBoard

## Functions

In [2]:
def load_data(filename, dataname):
    with h5py.File(filename, 'r') as hf:
        x = hf[dataname][:]
    with h5py.File('EP_preprocessed_y.h5', 'r') as hf:
        y = hf['EP_preprocessed_y_dataset'][:]
    return x,y

In [3]:
def reshape_x(pre_x):
    reshaped_x = np.reshape(pre_x, (pre_x.shape[0], pre_x.shape[1]*pre_x.shape[2]))
    return reshaped_x

In [4]:
def train_test_split(file, data):
    x_,y_ = load_data(file, data)
    x_ = reshape_x(x_)
    
    x_train, x_test = x_[:int(len(x_)*.80)], x_[-int(len(x_)*.20):]
    y_train, y_test = y_[:int(len(y_)*.80)], y_[-int(len(y_)*.20):]
    
    print("X train shape: ", x_train.shape)
    print("X test shape: ", x_test.shape)
    print("Y train shape: ", y_train.shape)
    print("Y test shape: ", y_test.shape)
    
    return x_train, x_test, y_train, y_test

In [5]:
def get_PCA(n_component):
    pca = PCA(n_components=n_component)
    return pca

In [6]:
def fit_pca(n_component, xtrain):
    pca = get_PCA(n_component)
    pricinple_components = pca.fit(x_train)

    print("Number of Components given", str(n_pca_component*100), "% retention:", len(pca.explained_variance_ratio_))    
    return pca, pricinple_components

In [7]:
def pca_transform(n_component, xtrain, xtest):
    pca, pc = fit_pca(n_component, xtrain)
    
    new_xtrain = pca.transform(xtrain)
    new_xtest = pca.transform(xtest)
    
    print("Variance Ratio Matrix: \n", 100*pca.explained_variance_ratio_)
    return new_xtrain, new_xtest

In [8]:
def build_vanilla_model(layer, nodes):
    model = keras.Sequential()
    model.add(keras.layers.Flatten())

    for l in range(layer-1):
        model.add(keras.layers.Dense(nodes, activation="relu"))
        model.add(keras.layers.Dropout(.5))

    model.add(keras.layers.Dense(10, activation="softmax"))
    return model

In [9]:
def get_tensorboard(model_name, logdir):
    tensorboard = TensorBoard(log_dir=f'{logdir}\\{model_name}')
    #tensorboard = TensorBoard(log_dir=f'logs\\{model_name}')
    print("Model Name:", model_name)
    return tensorboard

## Hyper-Parameter Search

In [10]:
# Hyperparameters
dense_layers = [2,3,4]
layer_sizes = [32,64,128]
n_pca_components = [.80,.90,.99]

In [12]:
# Data Files and Data Sets - you can shorten the lists based on what files you wish to train on
data_files = ["EP_preprocessed_x.h5",
              "EP_spectral_20_upperBoundary_x.h5", "EP_spectral_30_upperBoundary_x.h5", "EP_spectral_50_upperBoundary_x.h5", "EP_spectral_100_upperBoundary_x.h5", 
              "EP_cropped_Fourier_noFilter_x.h5",
              "EP_cropped_Fourier_20Filter_x.h5", "EP_cropped_Fourier_30Filter_x.h5", "EP_cropped_Fourier_50Filter_x.h5", "EP_cropped_Fourier_100Filter_x.h5"]

data_sets = ["EP_preprocessed_x_dataset",
             "EP_spectral_20_upperBoundary_x_dataset","EP_spectral_30_upperBoundary_x_dataset", "EP_spectral_50_upperBoundary_x_dataset", "EP_spectral_100_upperBoundary_x_dataset",
            "EP_cropped_Fourier_noFilter_x_dataset",
             "EP_cropped_Fourier_20Filter_x_dataset","EP_cropped_Fourier_30Filter_x_dataset","EP_cropped_Fourier_50Filter_x_dataset","EP_cropped_Fourier_100Filter_x_dataset"]
names = ["none","spec20", "spec30", "spec50", "spec100", "fft", "ffts20", "ffts30", "ffts50", "ffts100"]

In [14]:
for i in range(len(data_files)):
    x_train, x_test, y_train, y_test = train_test_split(data_files[i], data_sets[i])

    for n_pca_component in n_pca_components:    

        # --- PCA ---
        pca_x_train, pca_x_test = pca_transform(n_pca_component, x_train, x_test)          

        for dense_layer in dense_layers:
            for layer_size in layer_sizes:

                # --- Tensorboard Callback --
                #NAME = f"PCA-NN-{names[i]}-{n_pca_component}pca_components-{dense_layer}dense-{layer_size}nodes-{int(time.time())}"
                #tensorboard = get_tensorboard(model_name=NAME, logdir='logs')

                # --- VANILLA NEURAL NETWORK ---
                model = build_vanilla_model(dense_layer, layer_size)
                model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
                model.fit(pca_x_train, y_train, epochs=30, validation_data=(pca_x_test,y_test)) #callbacks=[tensorboard])

X train shape:  (51876, 3500)
X test shape:  (12969, 3500)
Y train shape:  (51876, 10)
Y test shape:  (12969, 10)
Number of Components given 80.0 % retention: 15
Variance Ratio Matrix: 
 [18.66070398  9.44766992  7.86317925  6.67424534  6.13749646  5.77551164
  4.86586549  4.06204693  3.76164186  3.47085056  2.7286422   2.46058642
  2.02203084  1.88011093  1.46422464]
Train on 51876 samples, validate on 12969 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
 7392/51876 [===>..........................] - ETA: 4s - loss: 2.3003 - accuracy: 0.1052

KeyboardInterrupt: 