In [None]:
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras import layers
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.models import load_model

In [None]:
# Step 1. Load the training data rom the .npy files
data = np.load('data_train.npy')

# Step 2. Load the corresponding labels from the .npy files
labels = np.load('labels_train_corrected.npy')

# Transpose the data matrix
data = data.T

# Reshape the data into a 4D array, assuming images of size 300x300 with 3 color channels (RGB)
data = data[:,:].reshape(data.shape[0],300,300,3)
print(data.shape) # Print the shape of the reshaped data

# Split the data into training and test sets, with 30% of the data reserved for testing
x_train_full, x_test, t_train_full, t_test = train_test_split(data, labels, test_size=0.3, random_state=0, shuffle=True)

# Further split the training data into training and validation sets, with 20% of the training data reserved for validation
x_train, x_valid, t_train, t_valid = train_test_split(x_train_full, t_train_full, test_size=0.2)

# Save the test data and corresponding labels to .npy files
np.save('x_test.npy', x_test)
np.save('t_test.npy', t_test)

# Delete variables to free up memory
del data
del labels
del x_train_full
del t_train_full

(5933, 300, 300, 3)


In [None]:
# Step 3. specify 'checkpoint_path' as the path to the saved model file "best_model.h5"
checkpoint_path = "best_model.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [None]:
# Convert the training and validation data to 'float16' type and normalize the pixel values to the range [0, 1] by dividing by 255
x_train = x_train.astype('float16') / 255
x_valid = x_valid.astype('float16') / 255

# Convert the training and validation labels to one-hot encoded format with 10 classes
t_train = tf.keras.utils.to_categorical(t_train, num_classes=10)
t_valid = tf.keras.utils.to_categorical(t_valid, num_classes=10)


In [None]:
# Create a ResNet50 base model with pre-trained weights from ImageNet, without the top (output) layer
# The input shape is set to (300, 300, 3) to match the shape of the images
base_model = ResNet50(input_shape=(300,300,3), include_top=False, weights='imagenet')

# Set the base model to be trainable, allowing its weights to be updated during training
base_model.trainable = True

# Start building the custom model on top of the base model
x = base_model.output

# Apply random horizontal and vertical flipping as a data augmentation technique
x = tf.keras.layers.RandomFlip("horizontal_and_vertical")(x)

# Apply random rotation to the images, with a maximum rotation of 0.2 radians
x = tf.keras.layers.RandomRotation(0.2)(x)

# Apply global average pooling to reduce the spatial dimensions of the feature maps
x = tf.keras.layers.GlobalAveragePooling2D()(x)

# Apply a dropout layer with a rate of 0.4 to help prevent overfitting
x = tf.keras.layers.Dropout(0.4)(x)

# Add the final dense layer with 10 output units (for 10 classes) and a softmax activation function
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

# Create the final model by connecting the base model's input to the custom output layer
model = tf.keras.Model(inputs=base_model.input, outputs=outputs)

2023-08-07 16:56:21.240335: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-07 16:56:21.751345: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79111 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:47:00.0, compute capability: 8.0


In [None]:
# Step 4. Load the previously saved best model (if there is one, otherwise comment this line out)
model = load_model(checkpoint_path)

In [None]:
# Define a list of callbacks to be used during training:
# 1. EarlyStopping: Stops training when the validation loss stops improving, with a patience of 10 epochs.
#    It also restores the best weights of the model when training is stopped.
# 2. ModelCheckpoint: Saves the model at each epoch if the validation loss improves.
#    The model is saved to the specified 'checkpoint_path', and both the architecture and weights are saved.
callback = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',              # Monitor validation loss for improvements
        patience=10,                     # Number of epochs with no improvement to wait before stopping
        verbose=1,                       # Print additional logs
        restore_best_weights=True        # Restore the best weights found during training
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,        # Path to save the model
        monitor='val_loss',              # Metric to monitor for saving the best model
        verbose=1,                       # Print additional logs
        save_best_only=True,             # Save only if the monitored metric ('val_loss') has improved
        save_weights_only=False,         # Save the entire model, not just the weights
        mode='auto',                     # Automatically infer the direction of monitoring (min or max)
        save_freq='epoch',               # Save the model at the end of every epoch
        options=None,                    # Additional options (None in this case)
        initial_value_threshold=None     # Threshold for initial value (None in this case)
    )
]

In [None]:
# Define the Nadam optimizer with specific hyperparameters:
# - learning_rate: The step size for updating the model's weights, set to 0.001
# - beta_1: The exponential decay rate for the first moment estimates, set to 0.9
# - beta_2: The exponential decay rate for the second moment estimates, set to 0.999
# - epsilon: A small constant to prevent division by zero in the Adam calculations, set to 1e-07
optimizer = tf.keras.optimizers.Nadam(
    learning_rate=0.001,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07
)

In [None]:
# Compile the model with the specified loss function, optimizer, and evaluation metric:
# - Loss function: 'categorical_crossentropy', suitable for multi-class classification problems
# - Optimizer: The previously defined Nadam optimizer
# - Metrics: 'accuracy', to monitor the classification accuracy during training and validation
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

In [None]:
# Train the model using the fit method:
# - x_train and t_train: The training data and corresponding labels
# - batch_size: The number of samples per gradient update, set to 16
# - epochs: The number of times to iterate over the entire training dataset, set to 50
# - validation_data: The validation data and corresponding labels, used to evaluate the model after each epoch
# - callbacks: A list of callbacks to be applied during training, including EarlyStopping and ModelCheckpoint
model.fit(
    x_train, t_train,
    batch_size=16,
    epochs=50,
    validation_data=(x_valid, t_valid),
    callbacks=[callback]
)

Epoch 1/50


2023-08-07 16:56:40.032785: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201
2023-08-07 16:56:42.209969: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 00001: val_loss improved from inf to 0.45539, saving model to best_model.h5


  layer_config = serialize_layer_fn(layer)


Epoch 2/50
Epoch 00002: val_loss improved from 0.45539 to 0.24021, saving model to best_model.h5
Epoch 3/50
Epoch 00003: val_loss did not improve from 0.24021
Epoch 4/50
Epoch 00004: val_loss improved from 0.24021 to 0.16874, saving model to best_model.h5
Epoch 5/50
Epoch 00005: val_loss did not improve from 0.16874
Epoch 6/50
Epoch 00006: val_loss improved from 0.16874 to 0.07160, saving model to best_model.h5
Epoch 7/50
Epoch 00007: val_loss improved from 0.07160 to 0.05473, saving model to best_model.h5
Epoch 8/50
Epoch 00008: val_loss did not improve from 0.05473
Epoch 9/50
Epoch 00009: val_loss did not improve from 0.05473
Epoch 10/50
Epoch 00010: val_loss did not improve from 0.05473
Epoch 11/50
Epoch 00011: val_loss did not improve from 0.05473
Epoch 12/50
Epoch 00012: val_loss did not improve from 0.05473
Epoch 13/50
Epoch 00013: val_loss did not improve from 0.05473
Epoch 14/50
Epoch 00014: val_loss did not improve from 0.05473
Epoch 15/50
Epoch 00015: val_loss did not improve

<keras.callbacks.History at 0x2b9f55ef5e50>