In [3]:
import tensorflow as tf
import keras as kr
from keras import layers
from keras import regularizers
import keras_tuner

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset

In [9]:

df = pd.read_csv("/home/vmh/vmhdocs/Research/Inria/Anl/EnvDataAutoencoder/Data/Imputed_Metadata_metaGmetaT_by_OceanLayer.csv",sep=",")

selectcols = ["Latitude","Longitude","Depth.nominal","Temperature","Oxygen","ChlorophyllA","Carbon.total","Salinity","Gradient.Surface.temp","Fluorescence","CO3","HCO3","Density","PO4","NO3","Si","Alkalinity.total","Ammonium.5m","Lyapunov","NO2","Depth.Min.O2","NO2NO3","Nitracline","Brunt.Vaisala","Iron.5m","Depth.Max.O2","Okubo.Weiss"]
df_env = df[selectcols]

train_data = np.array(df_env)
print(train_data.shape)

inputlen = len(df_env.columns)
print(inputlen)

df_env.head(5)

(228, 27)
27


Unnamed: 0,Latitude,Longitude,Depth.nominal,Temperature,Oxygen,ChlorophyllA,Carbon.total,Salinity,Gradient.Surface.temp,Fluorescence,...,Ammonium.5m,Lyapunov,NO2,Depth.Min.O2,NO2NO3,Nitracline,Brunt.Vaisala,Iron.5m,Depth.Max.O2,Okubo.Weiss
0,35.759,14.2574,5,21.48665,208.245,0.013344,0.0103,37.89575,1.0513,1.091875,...,0.00912,0.066158,0.01833,221,0.118,193,61,0.001388,60,-0.124391
1,33.9179,32.898,5,20.4425,207.61355,0.067706,0.005,39.42311,0.24369,1.1765,...,0.00327,0.097233,0.00125,151,0.0455,193,59,0.001388,55,-0.309549
2,37.051,1.9378,9,23.8141,217.809833,0.189348,0.00695,37.522,1.0375,1.183333,...,0.015245,0.0,0.0445,167,0.14325,193,45,0.001388,37,-1.3813
3,42.2038,17.715,5,17.211742,221.782083,0.103935,0.005,38.22805,1.6157,1.465,...,0.001431,0.078584,0.009125,115,0.040875,193,3,0.001388,37,-0.354776
4,39.3888,19.3905,5,18.319192,217.809833,0.104797,0.005,38.185333,2.0091,1.465,...,0.002327,0.0,0.00425,174,0.03525,193,41,0.001388,37,0.369749


# Define model and hyperparameters to be tuned

In [15]:
def autoencoder(units, lr):
    inputlen=27
    encoding_dim = 3
    # This is my model
    model = kr.Sequential()
    model.add(kr.Input(shape=(inputlen,)))
    model.add(layers.Dense(units=units*2, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(units=units, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(encoding_dim, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(units=units, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(units=units*2, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(inputlen, activation='linear'))
    
   # This model maps an input to its reconstruction
    autoencoder = kr.Model(model.input, model.output)
    opt = kr.optimizers.Adam(learning_rate=lr)
    autoencoder.compile(optimizer=opt, loss='mse',metrics=["mean_squared_error"])
    return autoencoder

def build_model(hp):
    units = hp.Int("units", min_value=100, max_value=1000, sampling="log")
    lr = hp.Float("lr",min_value=1e-4, max_value=1e-2, sampling="log")
    model = autoencoder(units,lr)
    return model

build_model( keras_tuner.HyperParameters())

<keras.engine.functional.Functional at 0x7f1b29faae00>

# Search the best model

In [16]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="mean_squared_error",
    max_trials=10,
    executions_per_trial=3,
    seed = 333,
    overwrite=True,
    directory="/home/vmh/vmhdocs/Research/Inria/Anl/EnvDataAutoencoder/Code/autoenc001",
    project_name="tuner",
)


tuner.search_space_summary()
tb = kr.callbacks.TensorBoard(log_dir="./tb_logs", write_images=True,update_freq="epoch")
tuner.search(train_data, train_data, epochs=100,callbacks=[tb])

Trial 10 Complete [00h 00m 34s]
mean_squared_error: 1226.5561116536458

Best mean_squared_error So Far: 1173.7253824869792
Total elapsed time: 00h 02m 53s
INFO:tensorflow:Oracle triggered exit


# Save the best model

In [17]:
# Get best model.
best_model = tuner.get_best_models(num_models=1)

# Build the model.
best_model.build(input_shape=(inputlen,))
best_model.summary()

best_hps = tuner.get_best_hyperparameters(1)
# Build the model with the best hp.
autoencoder = build_model(best_hps)
autoencoder.save('./autoencoder_envdata.h5')

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 27)]              0         
                                                                 
 dense (Dense)               (None, 514)               14392     
                                                                 
 batch_normalization (BatchN  (None, 514)              2056      
 ormalization)                                                   
                                                                 
 dense_1 (Dense)             (None, 257)               132355    
                                                                 
 batch_normalization_1 (Batc  (None, 257)              1028      
 hNormalization)                                                 
                                                                 
 dense_2 (Dense)             (None, 3)                 774   

: 