In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorboard.plugins.hparams import api as hp

In [3]:
def load_data(filepath):
    import h5py
    import numpy as np
    h5f = h5py.File(filepath, 'r')
    X = h5f['X'][:]
    classnames = [s.decode('utf-8') for s in h5f['classname'][:]]
    filenames = [s.decode('utf-8') for s in h5f['filename'][:]]
    h5f.close()
    return X, np.array(classnames), np.array(filenames)

def create_train_test_idx(classnames):
    import pandas as pd
    df = pd.DataFrame(data=enumerate(classnames), columns=['index', 'classname'])
    test_df = None
    train_df = None
    for classname in df['classname'].unique():
        test_tmp_df = df[df['classname']==classname].sample(50, replace=False, random_state=1234)
        train_tmp_df = df[(df['classname']==classname) & ~(df['index'].isin(test_tmp_df['index']))]
        test_df = test_tmp_df if test_df is None else pd.concat([test_df, test_tmp_df])
        train_df = train_tmp_df if train_df is None else pd.concat([train_df, train_tmp_df])    
    return train_df['index'].values, test_df['index'].values
    
X, classnames, filenames = load_data('data/RockAI_images_224x224.h5')
train_idx, test_idx = create_train_test_idx(classnames)
X_train, X_test = X[train_idx], X[test_idx]
y_train = [classnames[i] for i in train_idx]
y_test = [classnames[i] for i in test_idx]

num_classes = 2

#Pre-process the data
X_train = tf.keras.applications.densenet.preprocess_input(X_train)
X_test = tf.keras.applications.densenet.preprocess_input(X_test)
y_train = [0 if x=='No_RA' else 1 for x in y_train]
y_test = [0 if x=='No_RA' else 1 for x in y_test]
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [4]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range = 30,
    horizontal_flip=True,
    vertical_flip=True)
# datagen = tf.keras.preprocessing.image.ImageDataGenerator()
datagen.fit(X_train)

In [5]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([64, 128, 256]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.25, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))

METRIC = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
        metrics=[hp.Metric(METRIC, display_name=METRIC)],
    )


def train_test_model(hparams):
    from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
    import time
    import numpy as np
    
    num_epochs = 30
        
    model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=[224, 224, 3]),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),        
        tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
        tf.keras.layers.Dense(2, activation=tf.nn.softmax)
    ])
    model.compile(
        loss='categorical_crossentropy',
        optimizer=hparams[HP_OPTIMIZER],        
        metrics=[METRIC],
    )

    start_time = time.time()
    model.fit(X_train, y_train, epochs=num_epochs, verbose=1)
    train_time = time.time() - start_time

    start_time = time.time()
    y_pred = model.predict(X_test)
    test_time = time.time() - start_time
    
    y_pred = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    
    C = confusion_matrix(y_true, y_pred)
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
        
    return C, acc, f1, train_time, test_time


def run(run_dir, hparams, log_result_filepath):
    import datetime
    
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        C, acc, f1, train_time, test_time = train_test_model(hparams)
        tf.summary.scalar('Accuracy', acc, step=1)
        tf.summary.scalar('F1-score', f1, step=1)
        
    with open(log_result_filepath, 'a') as fp:
        fp.write(f"timestamp: {datetime.datetime.now()}, ")
        for h in hparams:
            fp.write(f"{h.name}: {hparams[h]}, ")
        fp.write("data_augmentation: standard, epochs: {num_epochs} \n")
        fp.write(f"\t{str(C)}\n")
        fp.write(f"\tAccuracy: {acc:0.4}, F1-score: {f1:0.4}\n")
        fp.write(f"\tTrain time: {train_time:0.4}, Test_time: {test_time:0.4}\n")      

In [6]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
    for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
        for optimizer in HP_OPTIMIZER.domain.values:
            hparams = {
                HP_NUM_UNITS: num_units,
                HP_DROPOUT: dropout_rate,
                HP_OPTIMIZER: optimizer,
            }
            for _ in range(3):
                run_name = "run-%d" % session_num
                print('--- Starting trial: %s' % run_name)
                print({h.name: hparams[h] for h in hparams})
                run('logs/hparam_tuning/' + run_name, hparams, 'logs/hparam_tuning_score.txt')
                session_num += 1

--- Starting trial: run-0
{'num_units': 64, 'dropout': 0.25, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
--- Starting trial: run-1
{'num_units': 64, 'dropout': 0.25, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
--- Starting trial: run-2
{'num_units': 64, 'dropout': 0.25, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/