In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, cohen_kappa_score, confusion_matrix

In [17]:
dir = 'data/'
modelname='nasnetmobile0'

In [3]:
def load_tfrecord_dataset(file_path):
    # Define a function to parse the TFRecord file
    feature_description = {
        'image': tf.io.FixedLenFeature([224 * 224 * 1], tf.float32),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }

    def _parse_function(example_proto):
        # Parse the input tf.train.Example proto using the feature description
        parsed_features = tf.io.parse_single_example(example_proto, feature_description)
        image = tf.reshape(parsed_features['image'], [224, 224, 1])
        label = parsed_features['label']
        return image, label

    # Load and parse the dataset
    raw_dataset = tf.data.TFRecordDataset(file_path)
    parsed_dataset = raw_dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    return parsed_dataset

# Load the datasets from TFRecord files
ds_train = load_tfrecord_dataset(f'{dir}ds_train.tfrecord')
ds_val = load_tfrecord_dataset(f'{dir}ds_val.tfrecord')
ds_test = load_tfrecord_dataset(f'{dir}ds_test.tfrecord')

In [4]:
# Preprocess the datasets
def preprocess_dataset(dataset):
    # Apply preprocessing using a lambda function to convert grayscale to RGB and preprocess
    def refactor(image,label):
        # Convert grayscale images to RGB
        image = tf.image.grayscale_to_rgb(image)
        # Apply preprocessing
        image = tf.keras.applications.nasnet.preprocess_input(image)
        return image, label
    dataset = dataset.map(refactor, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset
# Apply preprocess to datasets
ds_train = preprocess_dataset(ds_train)
ds_val = preprocess_dataset(ds_val)
ds_test = preprocess_dataset(ds_test)

In [5]:
# Batch the datasets for training and evaluation
batch_size = 16
ds_train = ds_train.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_val = ds_val.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_test = ds_test.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [22]:
# Build base_model
base_model = tf.keras.applications.NASNetMobile(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False
# Build the model
model = tf.keras.Sequential()
model.add(base_model)
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(3, activation='softmax')) # Multi-class classification for labels [0, 1, 2]
# Unfreeze some layers in the base model for fine-tuning
base_model.trainable = True
fine_tune_at = len(base_model.layers) - 20  # Unfreeze the last 20 layers
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [23]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

In [24]:
# Measure training time
start_time = time.time()
# Train the model
history = model.fit(
    ds_train,
    validation_data = ds_val,
    epochs=100,
    callbacks=[early_stopping, lr_scheduler])
# Measure and print runtime
print(f"Training time: {time.time() - start_time:.2f} seconds")

Epoch 1/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 598ms/step - accuracy: 0.4142 - loss: 1.0934 - val_accuracy: 0.5118 - val_loss: 0.9720 - learning_rate: 1.0000e-04
Epoch 2/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 385ms/step - accuracy: 0.6219 - loss: 0.8423 - val_accuracy: 0.6235 - val_loss: 0.8602 - learning_rate: 1.0000e-04
Epoch 3/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 211ms/step - accuracy: 0.6526 - loss: 0.7980 - val_accuracy: 0.6235 - val_loss: 0.8045 - learning_rate: 1.0000e-04
Epoch 4/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 228ms/step - accuracy: 0.6761 - loss: 0.7639 - val_accuracy: 0.6412 - val_loss: 0.7773 - learning_rate: 1.0000e-04
Epoch 5/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 220ms/step - accuracy: 0.6961 - loss: 0.7346 - val_accuracy: 0.6353 - val_loss: 0.7668 - learning_rate: 1.0000e-04
Epoch 6/100
[1m85/85[0m [32m━━━━━━━━

In [None]:
# Unfreeze some layers in the base model for fine-tuning
base_model.trainable = True
fine_tune_at = len(base_model.layers) - 20  # Unfreeze the last 20 layers
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
model.compile(
    optimizer=tf.keras.optimizers.Adam(.00025),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [None]:
# Measure training time
start_time = time.time()
# Train the model
history_fine = model.fit(
    ds_train,
    validation_data = ds_val,
    epochs=100,
    callbacks=[early_stopping, lr_scheduler])
# Measure and print runtime
print(f"Training time: {time.time() - start_time:.2f} seconds")

In [9]:
# Evaluate the model
results = model.evaluate(ds_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 192ms/step - accuracy: 0.9351 - loss: 0.3264
Test Loss: 0.350116103887558, Test Accuracy: 0.9117646813392639


In [10]:
# Initialize lists for true labels and predictions
y_true = []
y_pred = []
# Generate predictions for additional metrics
for images, labels in ds_test:
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13s/step


In [11]:
from sklearn.preprocessing import label_binarize
# Convert lists to NumPy arrays for metric calculations
y_true_np = np.array(y_true)
y_pred_np = np.array(y_pred)
y_true_bin = label_binarize(y_true, classes=[0, 1, 2])
y_prob = model.predict(ds_test)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step


  self.gen.throw(typ, value, traceback)


In [12]:
# Calculate sklearn confusion matrix and classification report
conf_matrix = confusion_matrix(y_true_np, y_pred_np)
df_matrix = pd.DataFrame(conf_matrix)
df_report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()
print(df_matrix)
# Calculate additional keras metrics
precision = tf.keras.metrics.Precision()(y_true, y_pred).numpy()
recall = tf.keras.metrics.Recall()(y_true, y_pred).numpy()
f1 = 2 * (precision * recall) / (precision + recall)
roc_auc = roc_auc_score(y_true, tf.keras.utils.to_categorical(y_pred, 3), multi_class='ovr')
df_metrics = pd.DataFrame({'test_loss': [results[0]],
                           'valid_accurancy': [results[1]],
                           'precision': [precision],
                           'recall': [recall],
                           'f1': [2*(precision*recall) / (precision+recall)],
                           'roc_auc': [roc_auc],
                           'kappa': [cohen_kappa_score(y_true, y_pred)] })
print(df_metrics)

    0  1   2
0  75  0   9
1   0  7   2
2   4  0  73
   test_loss  valid_accurancy  precision    recall        f1   roc_auc  \
0   0.350116         0.911765   0.901099  0.953488  0.926554  0.908983   

     kappa  
0  0.83792  


In [13]:
# Extracting accuracy and loss values
df_history = pd.DataFrame(history.history)
# Generate the classification report and convert the report to a DataFrame
df_report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()

### Export Files

In [14]:
np.save(f'outputs/y_true_np-{modelname}.npy',y_true_np)
np.save(f'outputs/y_pred_np-{modelname}.npy',y_pred_np)
np.save(f'outputs/y_true_bin-{modelname}.npy',y_true_bin)
np.save(f'outputs/y_prob-{modelname}.npy',y_prob)

In [15]:
df_matrix.to_csv(f'outputs/matrix-{modelname}.csv')
df_report.to_csv(f'outputs/report-{modelname}.csv')
df_metrics.to_csv(f'outputs/metrics-{modelname}.csv')
df_history.to_csv(f'outputs/history-{modelname}.csv')

In [16]:
# Export our model to HDF5 file
model.save(f"models/{modelname}.keras")