In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import splitfolders
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
import matplotlib.pyplot as plt
import keras_tuner as kt
import scipy
import warnings
import os

#### Image Parameters

In [2]:
batch_size = 32
img_height = 256
img_width = 256
img_channels = 3

#### Data

In [3]:
train_split_test_dir='../0. Project Data/Reshaped zero-padded and split data'
train_ds = image_dataset_from_directory(
    train_split_test_dir + '/train',
    image_size=(img_width, img_height),
    batch_size=batch_size)
val_ds = image_dataset_from_directory(
    train_split_test_dir + '/val',
    image_size=(img_width, img_height),
    batch_size=batch_size)
test_ds = image_dataset_from_directory(
    train_split_test_dir + '/test',
    image_size=(img_width, img_height),
    batch_size=batch_size)

Found 2678 files belonging to 4 classes.
Found 667 files belonging to 4 classes.
Found 843 files belonging to 4 classes.


In [4]:
def build_model(hp):
    conv_base = keras.applications.vgg16.VGG16(
        weights='imagenet',
        include_top=False)
    conv_base.trainable = False
    
    data_augmentation = keras.Sequential([layers.RandomFlip('horizontal'), 
                                          layers.RandomRotation(0.1), 
                                          layers.RandomZoom(0.2),])
    
    hp_units = hp.Int('units', min_value=128, max_value=512, step=32)
    hp_dropout = hp.Float('dropout', min_value=.1, max_value=.5, step=.2)
    
    inputs = keras.Input(shape=(img_height, img_width, img_channels))
    x = data_augmentation(inputs)
    x = keras.applications.vgg16.preprocess_input(x)
    x = conv_base(x)
    x = layers.Flatten()(x)
    x = layers.Dense(hp_units)(x)
    x = layers.Dropout(hp_dropout)(x)
    outputs = layers.Dense(4, activation='softmax')(x)
    model = keras.Model(inputs, outputs)
    
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model 

In [5]:
tuner=kt.BayesianOptimization(build_model,
                             objective='val_accuracy',
                             max_trials = 100,
                             executions_per_trial=1,
                             directory='kt_logs',
                             overwrite=False); # if changed tuner, set to True

INFO:tensorflow:Reloading Tuner from kt_logs\untitled_project\tuner0.json


In [6]:
tuner.search_space_summary(extended=True)

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 128, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
dropout (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.5, 'step': 0.2, 'sampling': 'linear'}


In [7]:
callbacks = [
 keras.callbacks.EarlyStopping(monitor="val_loss", patience=7),
]

based off previous transfer learning model over fitting very early, we only do 20 epochs

In [8]:
tf.get_logger().setLevel('ERROR')

In [9]:
tuner.search(train_ds,
             batch_size=batch_size,
             epochs=20,
             validation_data=val_ds,
             callbacks=callbacks,
             verbose=2,
)

In [10]:
best_hps=tuner.get_best_hyperparameters(num_trials=5)

In [11]:
print(f'''
The hyperparameter search is complete. The optimal number of units in the first densely-connected layer is\n
{best_hps[0].get('units')}\n
and the optimal dropout rate for the dropout layer is \n
{best_hps[0].get('dropout')}.
''')


The hyperparameter search is complete. The optimal number of units in the first densely-connected layer is

128

and the optimal dropout rate for the dropout layer is 

0.5.



In [12]:
tuner.results_summary(num_trials=5)

Results summary
Results in kt_logs\untitled_project
Showing 5 best trials
Objective(name="val_accuracy", direction="max")

Trial 063 summary
Hyperparameters:
units: 128
dropout: 0.5
Score: 0.9640179872512817

Trial 048 summary
Hyperparameters:
units: 160
dropout: 0.5
Score: 0.9595202207565308

Trial 075 summary
Hyperparameters:
units: 288
dropout: 0.30000000000000004
Score: 0.9580209851264954

Trial 086 summary
Hyperparameters:
units: 128
dropout: 0.1
Score: 0.95652174949646

Trial 064 summary
Hyperparameters:
units: 352
dropout: 0.30000000000000004
Score: 0.9550225138664246


In [13]:
def get_best_epoch(hp):
    model = build_model(hp)
    callbacks=[
    keras.callbacks.EarlyStopping(
    monitor='val_loss', mode='min', patience=10)
    ]
    history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    batch_size=batch_size,
    callbacks=callbacks)
    val_loss_per_epoch = history.history['val_loss']
    best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1
    print(f"Best epoch: {best_epoch}")
    return best_epoch

In [14]:
combined_train_and_val_ds = tf.data.Dataset.concatenate(train_ds, val_ds).shuffle(buffer_size=100)

In [15]:
def get_best_trained_model(hp):
    best_epoch = get_best_epoch(hp)
    model = build_model(hp)
    model.fit(
        combined_train_and_val_ds,
        batch_size=batch_size, epochs=int(best_epoch * 1.25)) #train for more epoch cause more data
    return model

In [16]:
best_models = []
for hp in best_hps:
    model = get_best_trained_model(hp)
    model.evaluate(test_ds)
    best_models.append(model)

Epoch 1/100
Epoch 2/100

KeyboardInterrupt: 

In [None]:
best_models

In [None]:
best_models[0].predict(test_ds)

In [None]:
for index, hp_mod in enumerate(best_models):
    if not os.path.exists('./saved_models'):
            os.makedirs('./saved_models')
    hp_mod.save('./saved_models/my_model'+ str(index+1))

In [None]:
new_model = tf.keras.models.load_model('saved_models/my_model1')

In [None]:
new_model.summary()

In [None]:
new_model.predict(test_ds)

In [None]:
new_model.evaluate(test_ds)

In [None]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

include validation data as part of final training with chosen hyperparameters

find best epoch

retrain on best epoch+validation data extend epoch by a little more cause more data

use test data