## Evaluate
* Load the previously trained models
* Load the test data
* Evaluate the accuracy and inference speed of each model

In [1]:
from tensorflow.keras.utils import to_categorical

import numpy as np
from os.path import join
import pickle
import random

2025-01-16 20:05:25.291332: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-16 20:05:25.299246: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737054325.308362  102648 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737054325.311135  102648 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-16 20:05:25.320631: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Global variables

In [2]:
DATA_PATH = 'data'
MODELS_PATH = 'models'

In [10]:
# input image dimensions
input_width = 300
input_height = 300
input_depth = 3
input_dim = input_width*input_height*input_depth
input_shape = (input_width, input_height, input_depth)

# classification classes
output_classes = 7

# size of sample used to train the model
NUM_TEST_SAMPLES = 100

# Rescale the data (color) values
scale = False  # rather not

## Test data
Load the data prepared in previous notebook

In [11]:
with np.load(join(DATA_PATH, 'test_data.npz')) as data:
    data_dict=dict(data.items())
X_test = data_dict['X']
y_test = data_dict['y']

Optionally subsample the data for faster training during try-outs

In [12]:
# Get a smaller random sample 
idx = random.sample(range(len(X_test)-1), NUM_TEST_SAMPLES)
X_test_sub = X_test[idx]
y_test_sub = y_test[idx]

# Flatten images, as required by dumnmy and SVn models
X_flat = [img.flatten() for img in X_test_sub]

## Test accuracy
Test accuracy on test dataset for different models trained in previous notebooks

### Dummy model
Dummy -most common category- model, used as baseline

In [13]:
with open(join(MODELS_PATH, 'dummy.pickle'), 'rb') as handle:
    dummy = pickle.load(handle)

In [18]:
%%time
# Compute accuracy on the test set
accuracy = dummy.score(X_flat, y_test_sub)
print('Baseline accuracy: {:.3f}'.format(accuracy))

Baseline accuracy: 0.120
CPU times: user 2.84 ms, sys: 0 ns, total: 2.84 ms
Wall time: 2.55 ms


### SVN
Support Vector Machine, also applied to flattened images

In [15]:
with open(join(MODELS_PATH, 'pipe_svc.pickle'), 'rb') as handle:
    svn = pickle.load(handle)

In [17]:
%%time
# Compute accuracy on the test set
accuracy = svn.score(X_flat, y_test_sub)
print('SVN accuracy: {:.3f}'.format(accuracy))

SVN accuracy: 0.230
CPU times: user 6min 28s, sys: 67.9 ms, total: 6min 28s
Wall time: 32.6 s


### CNN
Convolutional Neural Network model, applied to 2D image data

In [21]:
with open(join(MODELS_PATH, 'cnn.pickle'), 'rb') as handle:
    cnn = pickle.load(handle)

2025-01-16 20:21:18.656591: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [19]:
# One-hot encode the labels
_, y_test_preprocessed = np.unique(y_test, return_inverse=True)

In [23]:
%%time
(test_loss, test_accuracy) = cnn.evaluate(X_test, y_test_preprocessed, batch_size=32)

print('Test loss: {:.2f}'.format(test_loss)) 
print('Test accuracy: {:.2f}%'.format(100*test_accuracy)) 

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 519ms/step - acc: 0.1555 - loss: 1.9443
Test loss: 1.94
Test accuracy: 15.54%
CPU times: user 9min 16s, sys: 372 ms, total: 9min 16s
Wall time: 1min 2s


### Transfer learning
Using MobileNetV3Small

In [26]:
with open(join(MODELS_PATH, 'transfer_mobilenet.pickle'), 'rb') as handle:
    transfer_mobilenet = pickle.load(handle)

In [27]:
_, y_test_classes = np.unique(y_test, return_inverse=True)
y_test_preprocessed = to_categorical(y_test_classes, num_classes=output_classes)

In [30]:
%%time
(test_loss, test_accuracy) = transfer_mobilenet.evaluate(X_test, y_test_preprocessed, batch_size=32)

print('Test loss: {:.2f}'.format(test_loss)) 
print('Test accuracy: {:.2f}%'.format(100*test_accuracy)) 

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 175ms/step - accuracy: 0.6217 - loss: 1.7474
Test loss: 1.69
Test accuracy: 63.29%
CPU times: user 2min 43s, sys: 3.97 s, total: 2min 47s
Wall time: 21.4 s
