In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import time

In [20]:
dir = 'data/'
modelname = 'mobilenetv2'

In [3]:
def load_tfrecord_dataset(file_path):
    # Define a function to parse the TFRecord file
    feature_description = {
        'image': tf.io.FixedLenFeature([224 * 224 * 1], tf.float32),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }

    def _parse_function(example_proto):
        # Parse the input tf.train.Example proto using the feature description
        parsed_features = tf.io.parse_single_example(example_proto, feature_description)
        image = tf.reshape(parsed_features['image'], [224, 224, 1])
        label = parsed_features['label']
        return image, label

    # Load and parse the dataset
    raw_dataset = tf.data.TFRecordDataset(file_path)
    parsed_dataset = raw_dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    return parsed_dataset

# Load the datasets from TFRecord files
ds_train = load_tfrecord_dataset(f'{dir}ds_train.tfrecord')
ds_val = load_tfrecord_dataset(f'{dir}ds_val.tfrecord')
ds_test = load_tfrecord_dataset(f'{dir}ds_test.tfrecord')

In [4]:
# Preprocess the datasets
def preprocess_dataset(dataset):
    # Apply preprocessing using a lambda function to convert grayscale to RGB and preprocess
    def refactor(image,label):
        return tf.keras.applications.mobilenet_v2.preprocess_input(tf.image.grayscale_to_rgb(image)), label
    dataset = dataset.map(refactor, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset
# Apply preprocess to datasets
ds_train = preprocess_dataset(ds_train)
ds_val = preprocess_dataset(ds_val)
ds_test = preprocess_dataset(ds_test)

In [5]:
# Batch the datasets for training and evaluation
batch_size = 16
ds_train = ds_train.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_val = ds_val.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
ds_test = ds_test.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [6]:
# Example of building a MobileNet model
base_model = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')
base_model.trainable = False
# Add custom layers for classification
global_avg_layer = tf.keras.layers.GlobalAveragePooling2D()
dense_layer = tf.keras.layers.Dense(3, activation='softmax')  # Multi-class classification for labels [0, 1, 2]
# Assemble the model
model = tf.keras.Sequential([
    base_model,
    global_avg_layer,
    dense_layer
])
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [7]:
# Measure training time
start_time = time.time()
# Train the model
history = model.fit(ds_train,
          validation_data=ds_val,
          epochs=100)
# Measure and print runtime
print(f"Training time: {time.time() - start_time:.2f} seconds")

Epoch 1/100
     85/Unknown [1m11s[0m 70ms/step - accuracy: 0.4339 - loss: 1.2044

  self.gen.throw(typ, value, traceback)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 96ms/step - accuracy: 0.4350 - loss: 1.2019 - val_accuracy: 0.6882 - val_loss: 0.8187
Epoch 2/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 82ms/step - accuracy: 0.6215 - loss: 0.8226 - val_accuracy: 0.7353 - val_loss: 0.7803
Epoch 3/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 80ms/step - accuracy: 0.6679 - loss: 0.7692 - val_accuracy: 0.7294 - val_loss: 0.7526
Epoch 4/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 81ms/step - accuracy: 0.6985 - loss: 0.7332 - val_accuracy: 0.7118 - val_loss: 0.7314
Epoch 5/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 81ms/step - accuracy: 0.7141 - loss: 0.7045 - val_accuracy: 0.7353 - val_loss: 0.7150
Epoch 6/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 82ms/step - accuracy: 0.7154 - loss: 0.6802 - val_accuracy: 0.7235 - val_loss: 0.7015
Epoch 7/100
[1m85/85[0m [32m━━━━━━━━

In [8]:
# Evaluate the model
results = model.evaluate(ds_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.9173 - loss: 0.2858
Test Loss: 0.30072200298309326, Test Accuracy: 0.8941176533699036


In [17]:
# Initialize lists for true labels and predictions
y_true = []
y_pred = []
# Generate predictions for additional metrics
for images, labels in ds_test:
    preds = model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 548ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step


In [18]:
from sklearn.preprocessing import label_binarize
# Convert lists to NumPy arrays for metric calculations
y_true_np = np.array(y_true)
y_pred_np = np.array(y_pred)
y_true_bin = label_binarize(y_true, classes=[0, 1, 2])
y_prob = model.predict(ds_test)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 483ms/step


  self.gen.throw(typ, value, traceback)


In [21]:
np.save(f'outputs/y_true_np-{modelname}.npy',y_true_np)
np.save(f'outputs/y_pred_np-{modelname}.npy',y_pred_np)
np.save(f'outputs/y_true_bin-{modelname}.npy',y_true_bin)
np.save(f'outputs/y_prob-{modelname}.npy',y_prob)

In [24]:
from sklearn.metrics import classification_report, roc_auc_score, cohen_kappa_score, confusion_matrix
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true_np, y_pred_np)
df_matrix = pd.DataFrame(conf_matrix)
print(df_matrix)
# Calculate additional metrics
precision = tf.keras.metrics.Precision()(y_true, y_pred).numpy()
recall = tf.keras.metrics.Recall()(y_true, y_pred).numpy()
f1 = 2 * (precision * recall) / (precision + recall)
roc_auc = roc_auc_score(y_true, tf.keras.utils.to_categorical(y_pred, 3), multi_class='ovr')
df_metrics = pd.DataFrame({'test_loss': [results[0]],
                           'test_accurancy': [results[1]],
                           'precision': [precision],
                           'recall': [recall],
                           'f1': [2*(precision*recall) / (precision+recall)],
                           'roc_auc': [roc_auc],
                           'kappa': [cohen_kappa_score(y_true, y_pred)] })
print(df_metrics)

    0  1   2
0  78  0   6
1   1  7   1
2  10  0  67
   test_loss  test_accurancy  precision    recall        f1   roc_auc  \
0   0.300722        0.894118   0.925926  0.872093  0.898204  0.895551   

      kappa  
0  0.804635  


In [None]:
import matplotlib.pyplot as plt
# Extracting accuracy and loss values
df_history = pd.DataFrame(history.history)
accuracy = df_history['accuracy']
val_accuracy = df_history['val_accuracy']

In [13]:
# Generate the classification report
report = classification_report(y_true, y_pred, output_dict=True)
# Convert the report to a DataFrame
df_report = pd.DataFrame(report).transpose()
# Save the DataFrame to a CSV file
df_report

Unnamed: 0,precision,recall,f1-score,support
0,0.876404,0.928571,0.901734,84.0
1,1.0,0.777778,0.875,9.0
2,0.905405,0.87013,0.887417,77.0
accuracy,0.894118,0.894118,0.894118,0.894118
macro avg,0.92727,0.858826,0.88805,170.0
weighted avg,0.896083,0.894118,0.893834,170.0


In [25]:
df_matrix.to_csv(f'outputs/matrix-{modelname}.csv')
df_report.to_csv(f'outputs/report-{modelname}.csv')
df_metrics.to_csv(f'outputs/metrics-{modelname}.csv')
df_history.to_csv(f'outputs/history-{modelname}.csv')

In [23]:
# Export our model to HDF5 file
model.save(f"models/{modelname}.h5")

