In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

#from tensorflow.data.Dataset import load
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import keras_tuner as kt
import pandas as pd

import os
from file_helpers import unpickle_from_file

2024-03-13 08:09:58.076145: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_labels_sorted = unpickle_from_file('../data/processed_data/train_labels_sorted.bin')

In [3]:
#Read training data
batch_size = 75
img_height = 227
img_width = 227

train_ds = tf.keras.utils.image_dataset_from_directory(
  '../data/processed_data/training_images',
  validation_split=0.15,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
labels = train_labels_sorted)

Found 51957 files belonging to 8 classes.
Using 44164 files for training.


2024-03-13 08:10:04.136938: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
#Read validation data

val_ds = tf.keras.utils.image_dataset_from_directory(
  '../data/processed_data/training_images',
  validation_split=0.15,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
labels = train_labels_sorted)

Found 51957 files belonging to 8 classes.
Using 7793 files for validation.


In [5]:
#Try to keep things in memory for performance reasons
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [6]:
def model_builder(hp):
    img_height = 227
    img_width = 227

    num_classes = 8

    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    #Droupout isn't necessarily independent like we've been treating it;
    #let's give a chance to retune it if necessary
    hp_dropout_rate = hp.Choice('dropout_rate', values=[0.2, 0.3, 0.4, 0.5])
    
    model = Sequential([
      layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
      layers.Conv2D(16, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Conv2D(32, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Conv2D(64, 3, padding='same', activation='relu'),
      layers.MaxPooling2D(),
      layers.Dropout(hp_dropout_rate),
      layers.Flatten(),
      layers.Dense(hp_units, activation='relu'),
      layers.Dense(num_classes)
    ])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

    return model

In [7]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

Reloading Tuner from my_dir/intro_to_kt/tuner0.json


In [8]:
#We had some models with loss less than 1, so 2 is more than a reasonable cutoff
stop_early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 2)

In [None]:
#Original example calls for 50, but training time will be
#very long for our model with 50. We'll do 20 max instead
tuner.search(train_ds, validation_data = val_ds, epochs=20, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

Trial 10 Complete [01h 14m 07s]
val_accuracy: 0.7774926424026489

Best val_accuracy So Far: 0.8181701302528381
Total elapsed time: 12h 52m 56s

Search: Running Trial #11

Value             |Best Value So Far |Hyperparameter
96                |256               |units
0.001             |0.01              |learning_rate
0.3               |0.2               |dropout_rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
Epoch 2/2

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 10 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(train_ds, validation_data = val_ds, epochs=10)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [None]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
history = hypermodel.fit(train_ds, validation_data = val_ds, epochs = best_epoch)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()