In [None]:
import pandas as pd
import numpy as np
from scipy import stats as stats
import matplotlib.pyplot as plt
import seaborn as sb
import KeyFunctions as me
import tensorflow as tf
import os, sys

from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, MaxPooling2D
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sympy.discrete.transforms import fwht, ifwht
from multiprocessing import Pool
import time
from contextlib import closing

RandState = 117

f = open('R6gResult.txt', 'w')
sys.stdout = f

In [None]:
df, labels = me.ConstructCombinedDataset()

KFold = StratifiedKFold(shuffle = True, random_state = RandState)
Folds = KFold.split(df, df.index)

In [None]:
SCtnacc = []
SCtnloss = []
SCttacc = []
SCttloss = []
SCvlacc = []
SCvlloss = []
SCregerr= []

FTtnacc = []
FTtnloss = []
FTttacc = []
FTttloss = []
FTvlacc = []
FTvlloss = []
FTregerr= []

HTtnacc = []
HTtnloss = []
HTttacc = []
HTttloss = []
HTvlacc = []
HTvlloss = []
HTregerr= []


In [None]:
#Import Full R6g Dataset
for i, (train_index, test_index) in enumerate(Folds):

    y_tn = df.index[train_index].to_numpy()
    y_tt = df.index[test_index].to_numpy()
    X_tt = df.iloc[test_index, :].to_numpy()
    X_tn = df.iloc[train_index, :].to_numpy()

    #Augment Data to 4000 Spectra
    X_tnAu, y_tnAu = me.AugmentData(X_tn, y_tn, 4000, df.columns.to_numpy(), False)

    #Set Training Parameters
    verbose = 0
    epochsvec = [5, 20, 50]
    batch_sizevec = [10, 50, 100]
    epochs = epochsvec[1]
    batch_size = batch_sizevec[1]
    prev_time = time.time()

    #Scale X-Data with Training Xs
    scaler = StandardScaler()
    scaler.fit(X_tnAu)
    X_tnS = scaler.transform(X_tnAu)
    X_ttS = scaler.transform(X_tt)

    #Encode y-Data with Training ys
    encoder = LabelEncoder()
    encoder.fit(y_tnAu)
    y_tn_e = encoder.transform(y_tnAu)
    y_tn_p = np_utils.to_categorical(y_tn_e, num_classes = len(labels))
    y_tt_e = encoder.transform(y_tt)
    y_tt_p = np_utils.to_categorical(y_tt_e, num_classes = len(labels))


    #Reshape All Data to a 3D Tensor of Shape [Number of Spectra, Number of Timesteps(1), Number of Wavelengths]
    X_tn_p = X_tnS.reshape(X_tnS.shape[0], X_tnS.shape[1], 1)
    X_tt_p = X_ttS.reshape(X_ttS.shape[0], X_ttS.shape[1], 1)

    y_tnT = tf.convert_to_tensor(y_tn_p)
    y_ttT = tf.convert_to_tensor(y_tt_p)
    X_tnT = tf.convert_to_tensor(X_tn_p)
    X_ttT = tf.convert_to_tensor(X_tt_p)

    ytruth = tf.argmax(input = y_ttT, axis = 1).numpy()
    ytruth = encoder.inverse_transform(ytruth)

    display("--- Processing Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    #Multi-class Classification with Keras
    n_timesteps, n_features, n_outputs = X_tn_p.shape[1], X_tn_p.shape[2], y_tn_p.shape[1]

    #Define Sequential Model - 1 Convolution Layer, 1 Dropout Layer, 1 Flatten Layer, 2 Dense Layers
    model = Sequential()
    model.add(Conv1D(filters = 64, kernel_size = 3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters = 64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    #Implement EarlyStopping
    stopper = tf.keras.callbacks.EarlyStopping(monitor = "val_loss",patience = 2,verbose = verbose, restore_best_weights = True)

    #Fit Model
    history_sc = model.fit(X_tnT, y_tnT, epochs=epochs, batch_size=batch_size, verbose=0, validation_split = 0.1, callbacks = stopper)

    #Evaluate Model
    SCloss, SCaccuracy = model.evaluate(X_ttT, y_ttT, batch_size=batch_size, verbose=verbose)

    #Make Prediction
    SCypred = model.predict(X_ttT)
    SCypred = tf.argmax(input = SCypred, axis = 1).numpy()
    SCypred = encoder.inverse_transform(SCypred)

    #Display Scaled Results
    print('SCALED')
    print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
    print(SCaccuracy, SCloss, history_sc.history['accuracy'][-1], history_sc.history['loss'][-1], history_sc.history['val_accuracy'][-1], history_sc.history['val_loss'][-1], me.Scorer(ytruth, SCypred, labels))
    
    SCtnacc.append(SCaccuracy)
    SCtnloss.append(SCloss)
    SCttacc.append(history_sc.history['accuracy'][-1])
    SCttloss.append(history_sc.history['loss'][-1])
    SCvlacc.append(history_sc.history['val_accuracy'][-1])
    SCvlloss.append(history_sc.history['val_loss'][-1])
    SCregerr.append(me.Scorer(ytruth, SCypred, labels))
    
    display("--- Training Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    ####
    #Apply Fourier Transform to Training and Testing Data
    X_tnf = np.apply_along_axis(np.fft.fft, axis=1, arr=X_tnAu)
    X_ttf = np.apply_along_axis(np.fft.fft, axis=1, arr=X_tt)

    #Combine Real and Imaginary Part of FT in form [real, imaginary]
    X_tnf = np.append(X_tnf.real, X_tnf.imag, axis = 1)
    X_ttf = np.append(X_ttf.real, X_ttf.imag, axis = 1)
    X_tnf= X_tnf.astype('float32')
    X_ttf= X_ttf.astype('float32')

    #Scale X-Data with Training Xs
    scaler = StandardScaler()
    scaler.fit(X_tnf)
    X_tnf = scaler.transform(X_tnf)
    X_ttf = scaler.transform(X_ttf)

    #Encode y-Data with Training ys
    encoder = LabelEncoder()
    encoder.fit(y_tnAu)
    y_tn_e = encoder.transform(y_tnAu)
    y_tn_p = np_utils.to_categorical(y_tn_e, num_classes = len(labels))
    y_tt_e = encoder.transform(y_tt)
    y_tt_p = np_utils.to_categorical(y_tt_e, num_classes = len(labels))

    #Reshape All Data to a 3D Tensor of Shape [Number of Spectra, Number of Timesteps(1), Number of Wavelengths]
    X_tn_p = X_tnf.reshape(X_tnf.shape[0], X_tnf.shape[1], 1)
    X_tt_p = X_ttf.reshape(X_ttf.shape[0], X_ttf.shape[1], 1)

    y_tnT = tf.convert_to_tensor(y_tn_p)
    y_ttT = tf.convert_to_tensor(y_tt_p)
    X_tnT = tf.convert_to_tensor(X_tn_p)
    X_ttT = tf.convert_to_tensor(X_tt_p)

    display("--- Processing Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    #Multi-class Classification with Keras
    n_timesteps, n_features, n_outputs = X_tn_p.shape[1], X_tn_p.shape[2], y_tn_p.shape[1]

    #Define Sequential Model - 1 Convolution Layer, 1 Dropout Layer, 1 Flatten Layer, 2 Dense Layers
    model = Sequential()
    model.add(Conv1D(filters = 64, kernel_size = 3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters = 64, kernel_size = 3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    #Implement EarlyStopping
    stopper = tf.keras.callbacks.EarlyStopping(monitor = "val_loss",patience = 2,verbose = verbose, restore_best_weights = True)

    #Fit Model
    history_ft = model.fit(X_tnT, y_tnT, epochs=epochs, batch_size=batch_size, verbose=0,  validation_split=0.1, callbacks = stopper)

    #Evaluate Model
    FTloss, FTaccuracy = model.evaluate(X_ttT, y_ttT, batch_size=batch_size, verbose=verbose)

    #Make Prediction
    FTypred = model.predict(X_ttT)
    FTypred = tf.argmax(input = FTypred, axis = 1).numpy()
    FTypred = encoder.inverse_transform(FTypred)

    #Fourier Results
    print('FOURIER')
    print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
    print(FTaccuracy, FTloss, history_ft.history['accuracy'][-1], history_ft.history['loss'][-1], history_ft.history['val_accuracy'][-1], history_ft.history['val_loss'][-1], me.Scorer(ytruth, FTypred, labels))

    FTtnacc.append(FTaccuracy)
    FTtnloss.append(FTloss)
    FTttacc.append(history_ft.history['accuracy'][-1])
    FTttloss.append(history_ft.history['loss'][-1])
    FTvlacc.append(history_ft.history['val_accuracy'][-1])
    FTvlloss.append(history_ft.history['val_loss'][-1])
    FTregerr.append(me.Scorer(ytruth, SCypred, labels))
    
    display("--- Training Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    #####
    #Apply Welsh-Hadamard Transform to Training and Testing Data
    X_tnh = np.apply_along_axis(fwht, axis=1, arr=X_tnAu)
    X_tth = np.apply_along_axis(fwht, axis=1, arr=X_tt)
    X_tnh = X_tnh.astype('float32')
    X_tth = X_tth.astype('float32')

    #Scale X-Data with Training Xs
    scaler = StandardScaler()
    scaler.fit(X_tnh)
    X_tnh = scaler.transform(X_tnh)
    X_tth = scaler.transform(X_tth)

    #Encode y-Data with Training ys
    encoder = LabelEncoder()
    encoder.fit(y_tnAu)
    y_tn_e = encoder.transform(y_tnAu)
    y_tn_p = np_utils.to_categorical(y_tn_e, num_classes = len(labels))
    y_tt_e = encoder.transform(y_tt)
    y_tt_p = np_utils.to_categorical(y_tt_e, num_classes = len(labels))

    #Reshape All Data to a 3D Tensor of Shape [Number of Spectra, Number of Timesteps(1), Number of Wavelengths]
    X_tn_p = X_tnh.reshape(X_tnh.shape[0], X_tnh.shape[1], 1)
    X_tt_p = X_tth.reshape(X_tth.shape[0], X_tth.shape[1], 1)

    y_tnT = tf.convert_to_tensor(y_tn_p)
    y_ttT = tf.convert_to_tensor(y_tt_p)
    X_tnT = tf.convert_to_tensor(X_tn_p)
    X_ttT = tf.convert_to_tensor(X_tt_p)

    display("--- Processing Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    #Multi-class Classification with Keras

    n_timesteps, n_features, n_outputs = X_tn_p.shape[1], X_tn_p.shape[2], y_tn_p.shape[1]

    #Define Sequential Model - 1 Convolution Layer, 1 Dropout Layer, 1 Flatten Layer, 2 Dense Layers
    model = Sequential()
    model.add(Conv1D(filters = 64, kernel_size = 3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters = 64, kernel_size = 3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    #Implement EarlyStopping
    stopper = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", mode = 'min',\
                                               patience = 2, verbose = 0, restore_best_weights = True)

    #Fit Model
    history_ht = model.fit(X_tnT, y_tnT, epochs=epochs, batch_size=batch_size, verbose=verbose,  validation_split=0.1, callbacks = stopper)

    #Evaluate Model
    HTloss, HTaccuracy = model.evaluate(X_ttT, y_ttT, batch_size=batch_size, verbose=verbose)

    #Make Prediction
    HTypred = model.predict(X_ttT)
    HTypred = tf.argmax(input = HTypred, axis = 1).numpy()
    HTypred = encoder.inverse_transform(HTypred)

    print('HADAMARD')
    print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
    print(HTaccuracy, HTloss, history_ht.history['accuracy'][-1], history_ht.history['loss'][-1], history_ht.history['val_accuracy'][-1], history_ht.history['val_loss'][-1], me.Scorer(ytruth, HTypred, labels))
    
    HTtnacc.append(HTaccuracy)
    HTtnloss.append(HTloss)
    HTttacc.append(history_ht.history['accuracy'][-1])
    HTttloss.append(history_ht.history['loss'][-1])
    HTvlacc.append(history_ht.history['val_accuracy'][-1])
    HTvlloss.append(history_ht.history['val_loss'][-1])
    HTregerr.append(me.Scorer(ytruth, SCypred, labels))
    
    display("--- Training Time: %s seconds ---" % (time.time() - prev_time))
    prev_time = time.time()
    
    print('----------------------------')
    

In [None]:
#Collect Averages
SCtnaccavg = np.mean(SCtnacc)
SCtnlossavg = np.mean(SCtnloss)
SCttaccavg = np.mean(SCttacc)
SCttlossavg = np.mean(SCttloss)
SCvlaccavg = np.mean(SCvlacc)
SCvllossavg = np.mean(SCvlloss)
SCregerravg = np.mean(SCregerr)

FTtnaccavg = np.mean(FTtnacc)
FTtnlossavg = np.mean(FTtnloss)
FTttaccavg = np.mean(FTttacc)
FTttlossavg = np.mean(FTttloss)
FTvlaccavg = np.mean(FTvlacc)
FTvllossavg = np.mean(FTvlloss)
FTregerravg = np.mean(FTregerr)

HTtnaccavg = np.mean(HTtnacc)
HTtnlossavg = np.mean(HTtnloss)
HTttaccavg = np.mean(HTttacc)
HTttlossavg = np.mean(HTttloss)
HTvlaccavg = np.mean(HTvlacc)
HTvllossavg = np.mean(HTvlloss)
HTregerravg = np.mean(HTregerr)

#Collect Standard Deviations
SCtnaccstd = np.std(SCtnacc)
SCtnlossstd = np.std(SCtnloss)
SCttaccstd = np.std(SCttacc)
SCttlossstd = np.std(SCttloss)
SCvlaccstd = np.std(SCvlacc)
SCvllossstd = np.std(SCvlloss)
SCregerrstd = np.std(SCregerr)

FTtnaccstd = np.std(FTtnacc)
FTtnlossstd = np.std(FTtnloss)
FTttaccstd = np.std(FTttacc)
FTttlossstd = np.std(FTttloss)
FTvlaccstd = np.std(FTvlacc)
FTvllossstd = np.std(FTvlloss)
FTregerrstd = np.std(FTregerr)

HTtnaccstd = np.std(HTtnacc)
HTtnlossstd = np.std(HTtnloss)
HTttaccstd = np.std(HTttacc)
HTttlossstd = np.std(HTttloss)
HTvlaccstd = np.std(HTvlacc)
HTvllossstd = np.std(HTvlloss)
HTregerrstd = np.std(HTregerr)

In [None]:
print('*********************************************************')
print('SCALED AVERAGE')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(SCtnaccavg, SCtnlossavg, SCttaccavg, SCttlossavg, SCvlaccavg, SCvllossavg, SCregerravg)
print('SCALED STANDARD DEVIATION')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(SCtnaccstd, SCtnlossstd, SCttaccstd, SCttlossstd, SCvlaccstd, SCvllossstd, SCregerrstd)
print('----------------------------')

print('FOURIER AVERAGE')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(FTtnaccavg, FTtnlossavg, FTttaccavg, FTttlossavg, FTvlaccavg, FTvllossavg, FTregerravg)
print('FOURIER STANDARD DEVIATION')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(FTtnaccstd, FTtnlossstd, FTttaccstd, FTttlossstd, FTvlaccstd, FTvllossstd, FTregerrstd)
print('----------------------------')
print('HADAMARD AVERAGE')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(HTtnaccavg, HTtnlossavg, HTttaccavg, HTttlossavg, HTvlaccavg, HTvllossavg, HTregerravg)
print('HADAMARD STANDARD DEVIATION')
print('Test Acc', 'Test Loss', 'Train Acc', 'Train Loss', 'Val_Acc', 'Val_Loss', 'Regression Error')
print(HTtnaccstd, HTtnlossstd, HTttaccstd, HTttlossstd, HTvlaccstd, HTvllossstd, HTregerrstd)
