In [None]:
import sys
sys.path.insert(0, '../')

import flammkuchen as fl
import tensorflow as tf
import librosa
from neural_networks.src.dataloader import DataLoader
from src.conf_matrices import generate_confusion_matrix
from src.roc_curves import generate_roc_curve
from src.inference_time import get_inference_time
import matplotlib.pyplot as plt
from utils.params import Params
import copy

import efficientnet.tfkeras as efn
from neural_networks.src.ResNet import resnet

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# All paths
PATH = "../neural_networks/Results/"
# Audio
PATH_RN18_AUDIO = PATH + "MIC/ResNet18/20210830-133118/Checkpoints/model.hdf5"
PATH_RN34_AUDIO = PATH + "MIC/ResNet34/20210830-133509/Checkpoints/model.hdf5"
PATH_EFNB0_AUDIO = PATH + "MIC/EfficientNetB0/20210830-133837/Checkpoints/model.hdf5"
PATH_MNV2_AUDIO = PATH + "MIC/MobileNetV2/20210830-134514/Checkpoints/model.hdf5"
PATH_RNNAMOH_AUDIO = PATH + "MIC/RNN_Amoh/20210830-134906/Checkpoints/model.hdf5"
PATH_RNNBASIC_AUDIO = PATH + "MIC/RNN_Basic/20210830-135050/Checkpoints/model.hdf5"

# NSA
PATH_RN18_NSA = PATH + "NSA/ResNet18/20210830-135221/Checkpoints/model.hdf5"
PATH_RN34_NSA = PATH + "NSA/ResNet34/20210830-135440/Checkpoints/model.hdf5"
PATH_EFNB0_NSA = PATH + "NSA/EfficientNetB0/20210830-135809/Checkpoints/model.hdf5"
PATH_MNV2_NSA = PATH + "NSA/MobileNetV2/20210830-140436/Checkpoints/model.hdf5"
PATH_RNNAMOH_NSA = PATH + "NSA/RNN_Amoh/20210830-140822/Checkpoints/model.hdf5"
PATH_RNNBASIC_NSA = PATH + "NSA/RNN_Basic/20210830-140953/Checkpoints/model.hdf5"

In [None]:
NB_CLASSES = 4

# load params from params.json
params = Params("../neural_networks/params.json")

# load test data for evaluation
params.n_mels_cnn = 64

params.signal_type = "MIC"
data_loader_mic = DataLoader(params=params, nb_classes=NB_CLASSES)
X_test_mic, Y_test_mic = data_loader_mic.get_test_data()

params.signal_type = "NSA"
data_loader_nsa = DataLoader(params=params, nb_classes=NB_CLASSES)
X_test_nsa, Y_test_nsa = data_loader_nsa.get_test_data()

In [None]:
def evaluate_model(model, X_test, Y_test):
    test_loss, test_acc = model.evaluate(X_test, Y_test, batch_size=32)
    return test_acc

In [None]:
# dict for saving all results
d = dict()

optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.BinaryCrossentropy()

## ResNet18

In [None]:
rn18_audio = resnet.resnet_18(num_classes=4)
rn18_audio.build(input_shape=(None, 64, 64, 1))
rn18_audio.load_weights(PATH_RN18_AUDIO)
rn18_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

rn18_nsa = resnet.resnet_18(num_classes=4)
rn18_nsa.build(input_shape=(None, 64, 64, 1))
rn18_nsa.load_weights(PATH_RN18_NSA)
rn18_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         rn18_test_acc=[evaluate_model(rn18_audio, X_test_mic[..., None], Y_test_mic), evaluate_model(rn18_nsa, X_test_nsa[..., None], Y_test_nsa)], 
         rn18_inf_time_cpu=[get_inference_time(rn18_audio, image_size=(1, 64, 64, 1), gpu=False), get_inference_time(rn18_nsa, image_size=(1, 64, 64, 1), gpu=False)],
         rn18_inf_time_gpu=[get_inference_time(rn18_audio, image_size=(1, 64, 64, 1), gpu=True), get_inference_time(rn18_nsa, image_size=(1, 64, 64, 1), gpu=True)],
         rn18_preds=[rn18_audio.predict(X_test_mic[..., None]), rn18_nsa.predict(X_test_nsa[..., None])],
         rn18_nb_params=rn18_audio.count_params())

## ResNet34

In [None]:
rn34_audio = resnet.resnet_34(num_classes=4)
rn34_audio.build(input_shape=(None, 64, 64, 1))
rn34_audio.load_weights(PATH_RN34_AUDIO)
rn34_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

rn34_nsa = resnet.resnet_34(num_classes=4)
rn34_nsa.build(input_shape=(None, 64, 64, 1))
rn34_nsa.load_weights(PATH_RN34_NSA)
rn34_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         rn34_test_acc=[evaluate_model(rn34_audio, X_test_mic[..., None], Y_test_mic), evaluate_model(rn34_nsa, X_test_nsa[..., None], Y_test_nsa)], 
         rn34_inf_time_cpu=[get_inference_time(rn34_audio, image_size=(1, 64, 64, 1), gpu=False), get_inference_time(rn34_nsa, image_size=(1, 64, 64, 1), gpu=False)],
         rn34_inf_time_gpu=[get_inference_time(rn34_audio, image_size=(1, 64, 64, 1), gpu=True), get_inference_time(rn34_nsa, image_size=(1, 64, 64, 1), gpu=True)],
         rn34_preds=[rn34_audio.predict(X_test_mic[..., None]), rn34_nsa.predict(X_test_nsa[..., None])],
         rn34_nb_params=rn34_audio.count_params())

## EfficientNetB0

In [None]:
efn_audio = efn.EfficientNetB0(input_shape=(64, 64, 1),
                              include_top=True,
                              weights=PATH_EFNB0_AUDIO,
                              classes=NB_CLASSES)
efn_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

efn_nsa = efn.EfficientNetB0(input_shape=(64, 64, 1),
                              include_top=True,
                              weights=PATH_EFNB0_NSA,
                              classes=NB_CLASSES)
efn_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         efn_test_acc=[evaluate_model(efn_audio, X_test_mic[..., None], Y_test_mic), evaluate_model(efn_nsa, X_test_nsa[..., None], Y_test_nsa)], 
         efn_inf_time_cpu=[get_inference_time(efn_audio, image_size=(1, 64, 64, 1), gpu=False), get_inference_time(efn_nsa, image_size=(1, 64, 64, 1), gpu=False)],
         efn_inf_time_gpu=[get_inference_time(efn_audio, image_size=(1, 64, 64, 1), gpu=True), get_inference_time(efn_nsa, image_size=(1, 64, 64, 1), gpu=True)],
         efn_preds=[efn_audio.predict(X_test_mic[..., None]), efn_nsa.predict(X_test_nsa[..., None])], 
         efn_nb_params=efn_audio.count_params())

## MobileNetV2

In [None]:
mnet_audio = tf.keras.applications.MobileNetV2(input_shape=(64, 64, 1),
                                               include_top=True,
                                               weights=PATH_MNV2_AUDIO,
                                               classes=NB_CLASSES)
mnet_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

mnet_nsa = tf.keras.applications.MobileNetV2(input_shape=(64, 64, 1),
                                             include_top=True,
                                             weights=PATH_MNV2_NSA,
                                             classes=NB_CLASSES)
mnet_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         mnet_test_acc=[evaluate_model(mnet_audio, X_test_mic[..., None], Y_test_mic), evaluate_model(mnet_nsa, X_test_nsa[..., None], Y_test_nsa)], 
         mnet_inf_time_cpu=[get_inference_time(mnet_audio, image_size=(1, 64, 64, 1), gpu=False), get_inference_time(mnet_nsa, image_size=(1, 64, 64, 1), gpu=False)],
         mnet_inf_time_gpu=[get_inference_time(mnet_audio, image_size=(1, 64, 64, 1), gpu=True), get_inference_time(mnet_nsa, image_size=(1, 64, 64, 1), gpu=True)],
         mnet_preds=[mnet_audio.predict(X_test_mic[..., None]), mnet_nsa.predict(X_test_nsa[..., None])], 
         mnet_nb_params=mnet_audio.count_params())

## RNN Amoh (https://ieeexplore.ieee.org/abstract/document/7570164)

In [None]:
rnn_amoh_audio = tf.keras.Sequential([
    tf.keras.layers.GRU(128, input_shape=(64, 64), return_sequences=True),
    tf.keras.layers.GRU(64, return_sequences=True),
    tf.keras.layers.GRU(32, return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64)),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

rnn_amoh_nsa = tf.keras.Sequential([
    tf.keras.layers.GRU(128, input_shape=(64, 64), return_sequences=True),
    tf.keras.layers.GRU(64, return_sequences=True),
    tf.keras.layers.GRU(32, return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64)),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

rnn_amoh_audio.load_weights(PATH_RNNAMOH_AUDIO)
rnn_amoh_nsa.load_weights(PATH_RNNAMOH_NSA)

rnn_amoh_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])
rnn_amoh_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         rnn_amoh_test_acc=[evaluate_model(rnn_amoh_audio, X_test_mic, Y_test_mic), evaluate_model(rnn_amoh_nsa, X_test_nsa, Y_test_nsa)], 
         rnn_amoh_inf_time_cpu=[get_inference_time(rnn_amoh_audio, image_size=(1, 64, 64), gpu=False), get_inference_time(rnn_amoh_nsa, image_size=(1, 64, 64), gpu=False)],
         rnn_amoh_inf_time_gpu=[get_inference_time(rnn_amoh_audio, image_size=(1, 64, 64), gpu=True), get_inference_time(rnn_amoh_nsa, image_size=(1, 64, 64), gpu=True)],
         rnn_amoh_preds=[rnn_amoh_audio.predict(X_test_mic), rnn_amoh_nsa.predict(X_test_nsa)], 
         rnn_amoh_nb_params=rnn_amoh_audio.count_params())

## RNN Basic

In [None]:
rnn_basic_audio = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, input_shape=(64, 64), return_sequences=True),
        tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(NB_CLASSES, activation='softmax')
        ])

rnn_basic_nsa = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, input_shape=(64, 64), return_sequences=True),
        tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(NB_CLASSES, activation='softmax')
        ])

rnn_basic_audio.load_weights(PATH_RNNBASIC_AUDIO)
rnn_basic_nsa.load_weights(PATH_RNNBASIC_NSA)

rnn_basic_audio.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])
rnn_basic_nsa.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

d = dict(d, 
         rnn_basic_test_acc=[evaluate_model(rnn_basic_audio, X_test_mic, Y_test_mic), evaluate_model(rnn_basic_nsa, X_test_nsa, Y_test_nsa)], 
         rnn_basic_inf_time_cpu=[get_inference_time(rnn_basic_audio, image_size=(1, 64, 64), gpu=False), get_inference_time(rnn_basic_nsa, image_size=(1, 64, 64), gpu=False)],
         rnn_basic_inf_time_gpu=[get_inference_time(rnn_basic_audio, image_size=(1, 64, 64), gpu=True), get_inference_time(rnn_basic_nsa, image_size=(1, 64, 64), gpu=True)],
         rnn_basic_preds=[rnn_basic_audio.predict(X_test_mic), rnn_basic_nsa.predict(X_test_nsa)],
         rnn_basic_nb_params=rnn_basic_audio.count_params())

## Save results using 'flammkuchen'

In [None]:
fl.save("results_evaluate_models.vfp", d)