In [1]:
import os
import shutil
import numpy as np



# Define paths
data_dir = 'data'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
test_dir = os.path.join(data_dir, 'test')

# Define the split ratios
train_ratio = 0.7
val_ratio = 0.2
# The test_ratio is implicitly defined as what's left after allocating for training and validation

# Get the classes
classes = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d)) and d not in ['train', 'val', 'test']]

# Split the data
for cls in classes:
    # Create directories for each class in train, val, and test directories
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(test_dir, cls), exist_ok=True)

    # List images in each class directory
    images = os.listdir(os.path.join(data_dir, cls))
    np.random.shuffle(images)  # Shuffle the images

    # Split images into train, val, and test
    train_count = int(train_ratio * len(images))
    val_count = int(val_ratio * len(images))

    for i, img in enumerate(images):
        if i < train_count:
            dest = os.path.join(train_dir, cls)
        elif i < train_count + val_count:
            dest = os.path.join(val_dir, cls)
        else:
            dest = os.path.join(test_dir, cls)

        # Move image
        shutil.move(os.path.join(data_dir, cls, img), os.path.join(dest, img))


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import preprocess_input


train_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)
val_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
        'data/train',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(

        'data/val',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 17728 images belonging to 8 classes.
Found 5062 images belonging to 8 classes.


In [8]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model

base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
epochs_for_frozen_training_phase = 5

In [10]:
model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // train_generator.batch_size,
    epochs=epochs_for_frozen_training_phase,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // validation_generator.batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1dafb6d3910>

In [11]:

import datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f'./Results/skinModelAfter{epochs_for_frozen_training_phase}EpochsFrozenTraining_{timestamp}.h5'
model.save(filename)

  saving_api.save_model(


In [12]:
from tensorflow.keras.models import load_model

# Load the model
model = load_model(filename)

In [13]:
epochs_after_thawing_training_phase = 2


In [14]:

from tensorflow.keras.optimizers import Adam
# Unfreeze some layers in the base model
for layer in base_model.layers[-4:]:
    layer.trainable = True

learning_rate = 0.0001

# Recompile the model (necessary after making changes to layer.trainable)
# Reduce Learning rate of Adam to prevent more dramatic changes in the top (recently thawed) layers
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])


# Continue training
model.fit(train_generator,
    steps_per_epoch=train_generator.n // train_generator.batch_size,
    epochs=epochs_after_thawing_training_phase,  # specify additional epochs
    validation_data=validation_generator,
    validation_steps=validation_generator.n // validation_generator.batch_size)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x1dafb6d2860>

In [15]:

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f'./Results/skinModelAfter{epochs_for_frozen_training_phase}EpochsOfThawedTraining{timestamp}.h5'
model.save(filename)

In [16]:
# Load the model
model = load_model(filename)

In [17]:
# Assuming the same preprocessing as training and validation
test_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

# Now define the test generator
test_generator = test_datagen.flow_from_directory(
    'data/test',
    target_size=(224, 224),
    batch_size=32,  # make sure batch_size is defined
    class_mode='categorical'
)

Found 2541 images belonging to 8 classes.


In [28]:

import io
from contextlib import redirect_stdout

dict_results = {}


test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.n // test_generator.batch_size)
print("Test accuracy:", test_accuracy)

dict_results['test_accuracy'] = test_accuracy
dict_results['Optimization_Title'] = f"try training with {epochs_for_frozen_training_phase} frozen epochs under a learning_rate of 0.001 (adam optimizer) and {epochs_after_thawing_training_phase} epochs with a learning_rate of {learning_rate}"
dict_results['num_of_training_images_removed'] = 0
dict_results['num_of_training_images'] = train_generator.n
dict_results['num_of_validation_images_removed'] = 0
dict_results['num_of_validation_images'] = validation_generator.n
dict_results['num_of_inputs'] = model.input_shape
dict_results['test_loss'] = test_loss
dict_results['learning_rate'] = model.optimizer.learning_rate.numpy()

# Redirect console printout to io.StringIO to collect it as a string
summary_str = io.StringIO()
with redirect_stdout(summary_str):
    model.summary()

dict_results['model_summary'] = summary_str.getvalue()
dict_results['Optimizer_details'] = model.optimizer.get_config()
dict_results['batch_size'] = 32
dict_results['target_size'] = (224, 224)
dict_results['class_mode'] = 'categorical'

file_path = "./Results/optimization_results.txt"

 # Open the file in append mode
with open(file_path, "a") as file:
    # Append a line to the file
    # line_to_append = "This is a new line to append to the file."
    file.write(str(dict_results).replace(',', ',\n') + "\n")


Test accuracy: 0.6198576092720032


In [21]:
print(str(dict_results))

{'test_accuracy': 0.6182753443717957, 'Optimization_Title': 'try training with 5 frozen epochs under a learning_rate of 0.001 (adam optimizer) and 2 epochs with a learning_rate of 0.0001', 'num_of_training_images_removed': 0, 'num_of_training_images': 17728, 'num_of_validation_images_removed': 0, 'num_of_validation_images': 5062, 'num_of_inputs': (None, 224, 224, 3), 'test_loss': 1.0774253606796265, 'learning_rate': 1e-04, 'model_summary': None, 'Optimizer_details': {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': False, 'is_legacy_optimizer': False, 'learning_rate': 1e-04, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}, 'batch_size': 32, 'target_size': (224, 224), 'class_mode': 'categorical'}


In [29]:
print(str(dict_results).replace(',', ',\n'))


{'test_accuracy': 0.6198576092720032,
 'Optimization_Title': 'try training with 5 frozen epochs under a learning_rate of 0.001 (adam optimizer) and 2 epochs with a learning_rate of 0.0001',
 'num_of_training_images_removed': 0,
 'num_of_training_images': 17728,
 'num_of_validation_images_removed': 0,
 'num_of_validation_images': 5062,
 'num_of_inputs': (None,
 224,
 224,
 3),
 'test_loss': 1.0740506649017334,
 'learning_rate': 1e-04,
 224,
 224,
 3)]     0         \n                                                                 \n block1_conv1 (Conv2D)       (None,
 224,
 224,
 64)      1792      \n                                                                 \n block1_conv2 (Conv2D)       (None,
 224,
 224,
 64)      36928     \n                                                                 \n block1_pool (MaxPooling2D)  (None,
 112,
 112,
 64)      0         \n                                                                 \n block2_conv1 (Conv2D)       (None,
 112,
 112,
 12

In [25]:
# dict_results['model_summary'] = 
type(model.summary())


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

NoneType

In [27]:
import io
from contextlib import redirect_stdout

summary_str = io.StringIO()
with redirect_stdout(summary_str):
    model.summary()

# Now summary_str.getvalue() contains the summary as a string
print(type(summary_str.getvalue()))


<class 'str'>


In [39]:
# Import our dependencies
# !pip install keras_tuner
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import keras_tuner as kt
import datetime
import shutil
import keras
import ast
import re

# Creates a new Sequential model with hyperparameter options
def create_model(hp):
    # nn_model = tf.keras.models.Sequential()
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))



    # Freeze the layers of the base model
    for layer in base_model.layers:
        layer.trainable = False



    num_layers = hp.Int('num_layers', 1, 2)

    # activation = hp.Choice('activation_layer_input', ['relu', 'leaky_relu'])



    # Add custom layers
    x = base_model.output
    x = Flatten()(x)

    # Add new layers based on hyperparameters
    for i in range(1, num_layers):
        # Get the number of neurons and activation for this layer
        num_neurons = hp.Int(f'activation_of_layer_{i}_after_VGG19_base_model', min_value=512, max_value=2048, step=256)
        activation = hp.Choice(f'activation_of_layer_{i}', ['relu', 'leaky_relu'])

        # Add the Dense layer
        if activation == 'leaky_relu':
            x = Dense(num_neurons)(x)  # Add Dense layer without activation
            x = tf.keras.layers.LeakyReLU(alpha=0.01)(x)  # Add separate LeakyReLU layer
        else:
            x = Dense(num_neurons, activation=activation)(x)

    num_neurons_for_layer = hp.Int(f'num_neurons_for_layer_{num_layers}_afterVGG19_base_model',
            min_value=512,
            max_value=2048,
            step=256)
    activation = hp.Choice('activation_layer_input', ['relu',  'leaky_relu'])

    # Add custom layers
    # x = base_model.output
    # x = Flatten()(x)
    x = Dense(num_neurons_for_layer, activation=activation)(x)
    x = Dropout(0.5)(x)



    # output layer
    predictions = Dense(train_generator.num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)


    lr = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])


    return model

In [43]:
# Delete old dirctory used by keras_tuner.Hyperband before a new keras_tuner.Hyperbad is made
def delete_directory_and_its_content(directory_name):

    # Use shutil.rmtree() to recursively delete directories and subdirectories
    try:
        shutil.rmtree(directory_name)
        print(f"Directory '{directory_name}' has been deleted successfully.")
    except OSError as e:
        print(f"Error: {e}")

In [44]:
max_epochs = 5

delete_directory_and_its_content('./untitled_project/')
tuner = kt.Hyperband(
            create_model,
            objective="val_accuracy",
            max_epochs=max_epochs,
            hyperband_iterations=8,
            seed=1)

Directory './untitled_project/' has been deleted successfully.


In [45]:

tuner.search(
    train_generator,
    steps_per_epoch=train_generator.n // train_generator.batch_size,
    epochs=max_epochs + 1,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // validation_generator.batch_size
)

Trial 13 Complete [00h 40m 15s]
val_accuracy: 0.5320411324501038

Best val_accuracy So Far: 0.6392405033111572
Total elapsed time: 19h 24m 30s

Search: Running Trial #14

Value             |Best Value So Far |Hyperparameter
1                 |2                 |num_layers
768               |1792              |num_neurons_for_layer_1_afterVGG19_base_model
leaky_relu        |relu              |activation_layer_input
0.00079743        |0.00028406        |learning_rate
1024              |512               |activation_of_layer_1_after_VGG19_base_model
leaky_relu        |relu              |activation_of_layer_1
1792              |512               |num_neurons_for_layer_2_afterVGG19_base_model
2                 |5                 |tuner/epochs
0                 |2                 |tuner/initial_epoch
1                 |1                 |tuner/bracket
0                 |1                 |tuner/round

Epoch 1/2

KeyboardInterrupt: 

In [None]:

dict_results = {}
first_model = tuner.get_best_models(num_models=1)[0]
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

filename = f'./Results/skinModelAfter{epochs_for_frozen_training_phase}EpochsOfThawedTraining{timestamp}.h5'
first_model.save(filename)
test_loss, test_accuracy = first_model.evaluate(test_generator, steps=test_generator.n // test_generator.batch_size)
print("Test accuracy:", test_accuracy)

dict_results['test_accuracy'] = test_accuracy
dict_results['Optimization_Title'] = f"try training with Hyperband with {epochs_for_frozen_training_phase} frozen epochs under a learning_rate of 0.001 (adam optimizer) with up to 8 hyperband iterations and between 1 and 2 layers (not including output layer) BEYOND the base_model given by VGG19"
dict_results['num_of_training_images_removed'] = 0
dict_results['num_of_training_images'] = train_generator.n
dict_results['num_of_validation_images_removed'] = 0
dict_results['num_of_validation_images'] = validation_generator.n
dict_results['num_of_inputs'] = first_model.input_shape
dict_results['test_loss'] = test_loss
dict_results['learning_rate'] = first_model.optimizer.learning_rate.numpy()

# Redirect console printout to io.StringIO to collect it as a string
summary_str = io.StringIO()
with redirect_stdout(summary_str):
    first_model.summary()

dict_results['model_summary'] = summary_str.getvalue()
dict_results['Optimizer_details'] = first_model.optimizer.get_config()
dict_results['batch_size'] = 32
dict_results['target_size'] = (224, 224)
dict_results['class_mode'] = 'categorical'

file_path = "./Results/optimization_results.txt"

 # Open the file in append mode
with open(file_path, "a") as file:
    # Append a line to the file
    # line_to_append = "This is a new line to append to the file."
    file.write(str(dict_results).replace(', "', ',\n"') + "\n")