# Model Evaluation
Using this notebook to evaluate all of the models

In [1]:
# model training using tensorflow
import numpy as np
import os
from matplotlib import pyplot

import pandas as pd

import tensorflow as tf

from tensorflow import keras

In [2]:
'''
Configuration Code
'''
def get_n_cores():
    """The NSLOTS variable, If NSLOTS is not defined throw an exception."""
    nslots = os.getenv("NSLOTS")
    if nslots is not None:
        return int(nslots)
    raise ValueError("Environment variable NSLOTS is not defined.")


os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

NUM_GPUS = len(tf.config.experimental.list_physical_devices("GPU"))
print("Num GPUs Available: ", NUM_GPUS)
if NUM_GPUS > 0:
    print(os.getenv("CUDA_VISIBLE_DEVICES"))

tf.config.set_soft_device_placement(True)
tf.keras.backend.set_floatx("float32")
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(get_n_cores())

Num GPUs Available:  1
0


In [3]:
# pull the filenames of all experiments
models = [m for m in next(os.walk("models"))[1] if "experiment" in m]
models = sorted(models)
models

['experiment_001',
 'experiment_002',
 'experiment_003',
 'experiment_004',
 'experiment_005a',
 'experiment_005b']

In [4]:
'''
Try the test set on each model and gather data in list of dictionaries
'''

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

TEST_DIRECTORY = os.path.join(os.getcwd(), 'backup_data', 'formatted_data')

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TEST_DIRECTORY,
    seed=123,
    image_size=(224,224),
    shuffle=False
)

results = []

for m in models:
    model = keras.models.load_model("models/{}".format(m))
    
    predictions = np.argmax(model.predict(test_ds), axis=1)
    actual = np.concatenate([y for x, y in test_ds], axis=0)
    
    report = classification_report(actual, predictions, output_dict=True)
    cmatrix = confusion_matrix(actual, predictions)
    
    results.append({
        "name": m,
        "size": os.path.getsize("models/{}/saved_model.pb".format(m)),
        "layers": len(model.layers),
        "accuracy": report['accuracy'],
        "true_pos": cmatrix[1,1],
        "true_neg": cmatrix[0,0],
        "false_pos": cmatrix[1,0],
        "false_neg": cmatrix[0,1],
        "pos_recall": report['1']['recall'],
        "neg_recall": report['0']['recall'],
        "pos_precision": report['1']['precision'],
        "neg_precision": report['0']['precision'],
        "pos_f1-score": report['1']['f1-score'],
        "neg_f1-score": report['0']['f1-score'],
    })

Found 253 files belonging to 2 classes.


In [5]:
data = pd.DataFrame(results)
data

Unnamed: 0,name,size,layers,accuracy,true_pos,true_neg,false_pos,false_neg,pos_recall,neg_recall,pos_precision,neg_precision,pos_f1-score,neg_f1-score
0,experiment_001,108772,6,0.968379,151,94,4,4,0.974194,0.959184,0.974194,0.959184,0.974194,0.959184
1,experiment_002,137029,8,0.956522,148,94,7,4,0.954839,0.959184,0.973684,0.930693,0.964169,0.944724
2,experiment_003,178843,11,0.980237,154,94,1,4,0.993548,0.959184,0.974684,0.989474,0.984026,0.974093
3,experiment_004,425921,12,0.964427,150,94,5,4,0.967742,0.959184,0.974026,0.949495,0.970874,0.954315
4,experiment_005a,3966087,7,0.936759,144,93,11,5,0.929032,0.94898,0.966443,0.894231,0.947368,0.920792
5,experiment_005b,4110284,7,0.968379,149,96,6,2,0.96129,0.979592,0.986755,0.941176,0.973856,0.96


In [6]:
data = data.set_index("name")

In [7]:
data.to_csv("results.csv")