Through playing with scaling of the data in notebook CGM_Scaling.ipynb I found that standard scaling works best. Here I will try and optimize network architecture and figure out why standard scaling would work best <br> 
Importing packages

In [1]:
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import h5py
import gdown
from collections import defaultdict
import matplotlib.pyplot as plt
####
from tensorflow import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import models 
from tensorflow.keras import layers 
from tensorflow.keras import initializers
from tensorflow.keras.models import load_model
#
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import MinMaxScaler

# 
from collections import defaultdict

Loading data

In [2]:
data_train = h5py.File( 'train_set.hdf5', 'r')
x_train = np.array( data_train['spectra'] )

data_val = h5py.File( 'val_set.hdf5', 'r')
x_val = np.array( data_val['spectra'] )

data_test = h5py.File('test_set.hdf5', 'r')
x_test = np.array( data_test['spectra'] )

Functions

In [3]:
def reconstr_loss(original_spectra, reconstructed_spectra, latent_dim):
    '''Function to calculate reconstruction loss.

    Inputs:
    - original_spectra (np.array): original spectra.
    - reconstructed_spectra (np.array): reconstruction of the original spectra from the latent representation.
    - latent_dim (integer): size of the latent space.

    Returns:
    - reconstruction loss with added penalty for the latent space size
    '''

    penalty = 0.00003
    penalty2 = 5*0.00003


    mse_loss = mean_squared_error(original_spectra, reconstructed_spectra, squared=True)
    loss_penalized = mse_loss + latent_dim*penalty + penalty2*(latent_dim > 6)

    return(loss_penalized)


input_dim = x_train.shape[1]
def make_encoder(hidden_nodes_list, activation_functions_list):

    if len(hidden_nodes_list)!= len(activation_functions_list):
        raise ValueError("length of hidden nodes list should be equal length of activation_functions_list")

    nLayers = len(hidden_nodes_list)
    encoder = models.Sequential()
    for i in range(nLayers): 

        act = activation_functions_list[i]
        n_nodes = hidden_nodes_list[i]
        if i==0:
            encoder.add(layers.Dense(n_nodes,activation=act, input_shape=(input_dim,)))
        else: 
            encoder.add(layers.Dense(n_nodes,activation=act))

        latent_dim = hidden_nodes_list[-1]
    return encoder , latent_dim

def make_decoder(hidden_nodes_list, activation_functions_list, latent_dim):
    if len(hidden_nodes_list)!= len(activation_functions_list):
        raise ValueError("length of hidden nodes list should be equal length of activation_functions_list")
    nLayers = len(hidden_nodes_list)
    decoder = models.Sequential()
    for i in range(nLayers): 

        act = activation_functions_list[i]
        n_nodes = hidden_nodes_list[i]
        if i==0:
            decoder.add(layers.Dense(n_nodes, activation=act, input_shape=(latent_dim,)))
        else: 
            decoder.add(layers.Dense(n_nodes,activation=act))
    return decoder 

In [4]:
from sklearn.preprocessing import StandardScaler


Scaler = StandardScaler()

x_train_sc = Scaler.fit_transform(x_train)
x_val_sc   = Scaler.transform(x_val)
x_test_sc  = Scaler.transform(x_test)

In [5]:
print(f' max x_train = {np.max(x_train)},  max x_train_sc = {np.max(x_train_sc)}')
print(f' min x_train = {np.min(x_train)},  min x_train_sc = {np.min(x_train_sc)}')
print()
print(f' max x_val = {np.max(x_val)},  max x_val_sc = {np.max(x_val_sc)}')
print(f' min x_val = {np.min(x_val)},  min x_val_sc = {np.min(x_val_sc)}')
print()
print(f' max x_test = {np.max(x_test)},  max x_test_sc = {np.max(x_test_sc)}')
print(f' min x_test = {np.min(x_test)},  min x_test_sc = {np.min(x_test_sc)}')
print()



 max x_train = 1.1827440440829455,  max x_train_sc = 5.443224686477889
 min x_train = 0.0,  min x_train_sc = -29.442531091994336

 max x_val = 1.1680716392704869,  max x_val_sc = 5.26183861330238
 min x_val = 0.0,  min x_val_sc = -28.50838955195364

 max x_test = 1.1755607741167227,  max x_test_sc = 5.444434349771324
 min x_test = 0.0,  min x_test_sc = -27.33959116555318



In [None]:
## plot the scaled version 
nr = 2; nc = 5
fig, axs = plt.subplots(nr,nc, figsize = (20,10))
ir = 0; ic = 0
for i in range(10):
    axs[ir,ic].plot(-x_train[i,:], label = 'unscaled', alpha = .5)
#     axs[ir,ic].plot(x_train_sc[i,:], label = 'scaled',  alpha = .5)
    axs[ir,ic].legend()
    ic +=1 
    if ic == nc:
        ic=0
        ir+=1 

In [6]:
# results_dict = defaultdict(lambda:'not present ')

input_dim = x_train.shape[1]


hidden_nodes = [264,128,6]
hidden_nodes_dec = [128,264,input_dim]

activation_e1= [ 'linear','relu','linear']#,'tanh','tanh','tanh']

activation_d1= [ 'relu','relu','relu']#,'tanh','tanh','tanh']

earlystopping = EarlyStopping(monitor = 'val_loss', patience = 10)
epochs = 100 
batch_size = 128

enc_act = ['sigmoid', 'tanh', 'linear','relu']

for act in enc_act:
    print(f'training for activation {act}')
    activation_e1[1] = act
    encoder,ld = make_encoder(hidden_nodes, activation_e1)
    decoder = make_decoder(hidden_nodes_dec,activation_d1, latent_dim = ld)
    network=models.Sequential()
    network.add(encoder)
    network.add(decoder)
    network.compile(optimizer='adam', loss='mse', metrics=['mse'])
### train the network 
    history = network.fit(x_train_sc,x_train_sc,
                  epochs=epochs,
                  batch_size=batch_size,
                  callbacks=[earlystopping], # Early stopping
                  validation_data=(x_val_sc,x_val_sc))
#     network.save(filepath)

## check reconstruction loss 
    test_set_latent_encoding = encoder(x_test_sc)
    test_set_reconstructions = decoder(test_set_latent_encoding)
        # 
    x_test_reconstructed = Scaler.inverse_transform(test_set_reconstructions)
    rl = reconstr_loss(x_test, x_test_reconstructed, latent_dim = test_set_latent_encoding.shape[1])
    print(rl)

training for activation sigmoid


2021-11-28 09:34:59.983762: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-11-28 09:34:59.983798: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-28 09:34:59.983824: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c0704a-s11.ufhpc): /proc/driver/nvidia/version does not exist
2021-11-28 09:34:59.985222: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-28 09:35:01.648988: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:1

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
0.005968016103785186
training for activation tanh
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
0.00597147476905179
training for activation linear
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epo

In [None]:

print(rl)

In [None]:
nr = 2; nc = 5
fig, axs = plt.subplots(nr,nc, figsize = (20,10))
ir = 0; ic = 0
for i in range(10):
    axs[ir,ic].plot(x_test[i,:], label = 'data', alpha = .5)
    axs[ir,ic].plot(x_test_reconstructed[i,:], label = 'recon',  alpha = .5)
    ic +=1 
    if ic == nc:
        ic=0
        ir+=1 
plt.tight_layout()
plt.show()
print(rl)
print()

In [None]:
predictions = network.predict(x_test_sc)
retransform = Scaler.inverse_transform(predictions)
mse_loss = mean_squared_error(x_test, retransform, squared=True)

In [None]:
mse_loss