In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, cohen_kappa_score, confusion_matrix

In [2]:
dir = 'data/'
modelname='inceptionv3'

In [3]:
def load_tfrecord_dataset(file_path):
    # Define a function to parse the TFRecord file
    feature_description = {
        'image': tf.io.FixedLenFeature([224 * 224 * 1], tf.float32),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }

    def _parse_function(example_proto):
        # Parse the input tf.train.Example proto using the feature description
        parsed_features = tf.io.parse_single_example(example_proto, feature_description)
        image = tf.reshape(parsed_features['image'], [224, 224, 1])
        label = parsed_features['label']
        return image, label

    # Load and parse the dataset
    raw_dataset = tf.data.TFRecordDataset(file_path)
    parsed_dataset = raw_dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    return parsed_dataset

# Load the datasets from TFRecord files
ds_train = load_tfrecord_dataset(f'{dir}ds_train.tfrecord')
ds_val = load_tfrecord_dataset(f'{dir}ds_val.tfrecord')
ds_test = load_tfrecord_dataset(f'{dir}ds_test.tfrecord')

In [5]:
# Preprocess the datasets
def preprocess_dataset(dataset):
    # Apply preprocessing using a lambda function to convert grayscale to RGB and preprocess
    def refactor(image,label):
        # Convert grayscale images to RGB
        image = tf.image.grayscale_to_rgb(image)
        # Apply preprocessing
        image = tf.keras.applications.inception_v3.preprocess_input(image)
        return image, label
    dataset = dataset.map(refactor, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset
# Apply preprocess to datasets
ds_train = preprocess_dataset(ds_train)
ds_val = preprocess_dataset(ds_val)
ds_test = preprocess_dataset(ds_test)

In [6]:
# Batch the datasets for training and evaluation
batch_size = 16
ds_train = ds_train.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_val = ds_val.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_test = ds_test.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [10]:
# Build base_model
base_model = tf.keras.applications.InceptionV3(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False
# Build the model
model = tf.keras.Sequential()
model.add(base_model)
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(3, activation='softmax')) # Multi-class classification for labels [0, 1, 2]
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [8]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

In [11]:
# Measure training time
start_time = time.time()
# Train the model
history = model.fit(
    ds_train,
    validation_data = ds_val,
    epochs=100,
    callbacks=[early_stopping, lr_scheduler])
# Measure and print runtime
print(f"Training time: {time.time() - start_time:.2f} seconds")

Epoch 1/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 431ms/step - accuracy: 0.5668 - loss: 0.9287 - val_accuracy: 0.6941 - val_loss: 0.7907 - learning_rate: 0.0010
Epoch 2/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 310ms/step - accuracy: 0.6609 - loss: 0.7723 - val_accuracy: 0.7235 - val_loss: 0.7468 - learning_rate: 0.0010
Epoch 3/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 306ms/step - accuracy: 0.6940 - loss: 0.7184 - val_accuracy: 0.7235 - val_loss: 0.7145 - learning_rate: 0.0010
Epoch 4/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 300ms/step - accuracy: 0.7211 - loss: 0.6799 - val_accuracy: 0.7294 - val_loss: 0.6901 - learning_rate: 0.0010
Epoch 5/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 304ms/step - accuracy: 0.7345 - loss: 0.6480 - val_accuracy: 0.7353 - val_loss: 0.6719 - learning_rate: 0.0010
Epoch 6/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [22]:
# Unfreeze some layers in the base model for fine-tuning
base_model.trainable = True
fine_tune_at = len(base_model.layers) - 20  # Unfreeze the last 20 layers
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
model.compile(
    optimizer=tf.keras.optimizers.Adam(.00025),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [23]:
# Measure training time
start_time = time.time()
# Train the model
history_fine = model.fit(
    ds_train,
    validation_data = ds_val,
    epochs=100,
    callbacks=[early_stopping, lr_scheduler])
# Measure and print runtime
print(f"Training time: {time.time() - start_time:.2f} seconds")

Epoch 1/100
     85/Unknown [1m25s[0m 174ms/step - accuracy: 0.7158 - loss: 1.0587

  self.gen.throw(typ, value, traceback)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 223ms/step - accuracy: 0.7158 - loss: 1.0595 - val_accuracy: 0.7824 - val_loss: 1.1384 - learning_rate: 2.5000e-04
Epoch 2/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 195ms/step - accuracy: 0.8508 - loss: 0.5744 - val_accuracy: 0.8471 - val_loss: 0.9361 - learning_rate: 2.5000e-04
Epoch 3/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 198ms/step - accuracy: 0.9269 - loss: 0.2017 - val_accuracy: 0.8294 - val_loss: 1.0568 - learning_rate: 2.5000e-04
Epoch 4/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 198ms/step - accuracy: 0.9517 - loss: 0.1343 - val_accuracy: 0.8588 - val_loss: 0.9446 - learning_rate: 2.5000e-04
Epoch 5/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 198ms/step - accuracy: 0.9715 - loss: 0.0855 - val_accuracy: 0.8824 - val_loss: 0.7720 - learning_rate: 2.5000e-04
Epoch 6/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━

In [12]:
# Evaluate the model
results = model.evaluate(ds_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 201ms/step - accuracy: 0.9239 - loss: 0.3078
Test Loss: 0.30718645453453064, Test Accuracy: 0.9176470637321472


In [13]:
# Initialize lists for true labels and predictions
y_true = []
y_pred = []
# Generate predictions for additional metrics
for images, labels in ds_test:
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


In [14]:
from sklearn.preprocessing import label_binarize
# Convert lists to NumPy arrays for metric calculations
y_true_np = np.array(y_true)
y_pred_np = np.array(y_pred)
y_true_bin = label_binarize(y_true, classes=[0, 1, 2])
y_prob = model.predict(ds_test)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 657ms/step


  self.gen.throw(typ, value, traceback)


In [15]:
# Calculate sklearn confusion matrix and classification report
conf_matrix = confusion_matrix(y_true_np, y_pred_np)
df_matrix = pd.DataFrame(conf_matrix)
df_report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()
print(df_matrix)
# Calculate additional keras metrics
precision = tf.keras.metrics.Precision()(y_true, y_pred).numpy()
recall = tf.keras.metrics.Recall()(y_true, y_pred).numpy()
f1 = 2 * (precision * recall) / (precision + recall)
roc_auc = roc_auc_score(y_true, tf.keras.utils.to_categorical(y_pred, 3), multi_class='ovr')
df_metrics = pd.DataFrame({'test_loss': [results[0]],
                           'valid_accurancy': [results[1]],
                           'precision': [precision],
                           'recall': [recall],
                           'f1': [2*(precision*recall) / (precision+recall)],
                           'roc_auc': [roc_auc],
                           'kappa': [cohen_kappa_score(y_true, y_pred)] })
print(df_metrics)

    0  1   2
0  78  0   6
1   1  8   0
2   7  0  70
   test_loss  valid_accurancy  precision    recall        f1   roc_auc  \
0   0.307186         0.917647   0.928571  0.906977  0.917647  0.928169   

      kappa  
0  0.848908  


In [16]:
# Extracting accuracy and loss values
df_history = pd.DataFrame(history.history)
# Generate the classification report and convert the report to a DataFrame
df_report = pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).transpose()

### Export Files

In [17]:
np.save(f'outputs/y_true_np-{modelname}.npy',y_true_np)
np.save(f'outputs/y_pred_np-{modelname}.npy',y_pred_np)
np.save(f'outputs/y_true_bin-{modelname}.npy',y_true_bin)
np.save(f'outputs/y_prob-{modelname}.npy',y_prob)

In [18]:
df_matrix.to_csv(f'outputs/matrix-{modelname}.csv')
df_report.to_csv(f'outputs/report-{modelname}.csv')
df_metrics.to_csv(f'outputs/metrics-{modelname}.csv')
df_history.to_csv(f'outputs/history-{modelname}.csv')

In [20]:
# Export our model to HDF5 file
model.save(f"models/{modelname}.keras")