##### Copyright 2019 The TensorFlow Authors.

In [None]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
import datetime
from sklearn.model_selection import train_test_split
import time
from sklearn.preprocessing import StandardScaler
import numpy as np # for math and arrays
import pandas as pd
from tqdm import tqdm

#import tensorflow.keras as keras
import tensorflow as tf
import datetime
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import time
from sklearn.preprocessing import StandardScaler
import numpy as np # for math and arrays
import pandas as pd


In [None]:


titanic = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")

titanic_features = titanic.copy()
titanic_labels = titanic_features.pop('survived')
inputs = {}

for name, column in titanic_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

numeric_inputs = {name:input for name,input in inputs.items()
                  if input.dtype==tf.float32}

x = tf.keras.layers.Concatenate()(list(numeric_inputs.values()))
norm = tf.keras.layers.Normalization()
norm.adapt(np.array(titanic[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)
preprocessed_inputs = [all_numeric_inputs]

for name, input in inputs.items():
  if input.dtype == tf.float32:
    continue
  lookup = tf.keras.layers.StringLookup(vocabulary=np.unique(titanic_features[name]))
  one_hot = tf.keras.layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())

  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)

preprocessed_inputs_cat = tf.keras.layers.Concatenate()(preprocessed_inputs)

titanic_preprocessing = tf.keras.Model(inputs, preprocessed_inputs_cat)

titanic_features_dict = {name: np.array(value) 
                         for name, value in titanic_features.items()}

features_dict = {name:values[:1] for name, values in titanic_features_dict.items()}
titanic_preprocessing(features_dict)
x_train, x_valid, x_test = {}, {}, {}
for col in pd.DataFrame(titanic_features_dict):
  x_train[col] = np.array(pd.DataFrame(titanic_features_dict)[col][:500].values)
  x_valid[col] = np.array(pd.DataFrame(titanic_features_dict)[col][500:563].values)
  x_test[col] = np.array(pd.DataFrame(titanic_features_dict)[col][563:].values)
y_train, y_valid, y_test = titanic_labels[:500],titanic_labels[500:563],titanic_labels[563:]


In [None]:

# HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([128]))
# HP_NUM_LAYERS = hp.HParam('num_layers', hp.Discrete([1]))
# # HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
# HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))
# HP_LR = hp.HParam('lr', hp.Discrete([0.001]))
# HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([1,64]))
# METRIC_ACCURACY = 'accuracy'
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([32,64,128,256,1000,2000]))
HP_NUM_LAYERS = hp.HParam('num_layers', hp.Discrete([1,2,3,4]))
# HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))
HP_LR = hp.HParam('lr', hp.Discrete([0.1,0.01,0.001,0.0001,]))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([1,16,32,64,128,256]))
METRIC_ACCURACY = 'accuracy'


log_name = 'logs_'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'/hparam_tuning'

with tf.summary.create_file_writer(log_name).as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_NUM_LAYERS, HP_OPTIMIZER, HP_LR, HP_BATCH_SIZE],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )

def helper_fct_return_optimizer_w_learn_rate(opt_name:str,lr:float):
  if opt_name == "sgd":
    return tf.keras.optimizers.SGD(learning_rate=lr)
  if opt_name == "adam":
    return tf.keras.optimizers.Adam(learning_rate=lr)
  return "error"

def train_test_model(hparams,run_dir,preprocessing_head, inputs):
  body = tf.keras.models.Sequential()
  for _ in range(int(hparams[HP_NUM_LAYERS])):
    body.add(tf.keras.layers.Dense(hparams[HP_NUM_UNITS]))
      # tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
  body.add(tf.keras.layers.Dense(1))
  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs, result)
  model.compile(
      optimizer=helper_fct_return_optimizer_w_learn_rate(hparams[HP_OPTIMIZER],hparams[HP_LR],),
      loss='BinaryCrossentropy', metrics=['accuracy'],)
  model.fit(x_train, y_train,validation_data=(x_valid,y_valid),epochs=100, shuffle=True,verbose=False, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=run_dir+'_'+str(hparams[HP_NUM_LAYERS])+'layers_'+str(hparams[HP_NUM_UNITS])+'nodes_'+hparams[HP_OPTIMIZER]+str(hparams[HP_LR])+'_'+str(hparams[HP_BATCH_SIZE]), histogram_freq=1)],batch_size=(hparams[HP_BATCH_SIZE])) 
  _, accuracy = model.evaluate(x_valid, y_valid,verbose=False)
  return accuracy

def run(run_dir, hparams, preprocessing_head, inputs):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams,run_dir, preprocessing_head, inputs)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

session_num = 0
for num_units in tqdm(HP_NUM_UNITS.domain.values):
  for num_layers in HP_NUM_LAYERS.domain.values:
    for optimizer in HP_OPTIMIZER.domain.values:
      for lr in HP_LR.domain.values:   
        for batch_size in HP_BATCH_SIZE.domain.values:
          hparams = {HP_NUM_UNITS: num_units,HP_NUM_LAYERS: num_layers, HP_OPTIMIZER: optimizer, HP_LR: lr, HP_BATCH_SIZE: batch_size }
          run_name = "run-%d" % session_num
          run(log_name+ run_name, hparams, titanic_preprocessing, inputs )
          session_num += 1
