In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import cv2
import os

import tensorflow as tf
from tensorflow import keras as k
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import array_to_img
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, concatenate
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import tensorflow_addons as tfa
import keras_tuner

os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
#Variables
image_size = 256
in_channel_spec = 9
in_channel_tool = 3
num_classes = 3
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 1
num_epochs = 100

In [None]:
#Functions

#Convert images to numpy array

def read_tools(file_paths, image_size, channels):
  images = []
  
  for file_path in file_paths:
    img = cv2.imread(file_path)
    res = cv2.resize(img, dsize=(image_size, image_size), interpolation=cv2.INTER_CUBIC)
    images.append(res)
  
  images = np.asarray(images, dtype=np.float32)
  
  # normalize
  images = images / np.max(images)
  
  # reshape to match Keras expectaions
  images = images.reshape(images.shape[0], image_size, image_size, channels)

  return images

def read_specs(file_paths_x, file_paths_y, file_paths_z, image_size, channels):
  images = []
  
  for i in range(file_paths_x.size):
    img_x = cv2.imread(file_paths_x[i])
    res_x = cv2.resize(img_x, dsize=(image_size, image_size), interpolation=cv2.INTER_CUBIC)
    images.append(res_x)
    img_y = cv2.imread(file_paths_y[i])
    res_y = cv2.resize(img_y, dsize=(image_size, image_size), interpolation=cv2.INTER_CUBIC)
    images.append(res_y)
    img_z = cv2.imread(file_paths_z[i])
    res_z = cv2.resize(img_z, dsize=(image_size, image_size), interpolation=cv2.INTER_CUBIC)
    images.append(res_z)
  
  images = np.asarray(images, dtype=np.float32)
  images = images / np.max(images)
  images = images.reshape(file_paths_x.shape[0], image_size, image_size, channels)

  return images


#Patch dataset

def generate_datasets(images, labels, is_train=False):
    dataset = images
    dataset = dataset.batch(batch_size)
    if is_train:
        dataset = dataset.map(
            lambda x, y: (data_augmentation(x), y), num_parallel_calls=auto
            
        )
    return dataset.prefetch(auto)


#Model creation methods
#This method deletes last classification layers from the network, and adds concatenation layer with gru layer
#and new classification layer.
def multimodal_conv_build(model_tool, model_spec):
    inputs1 = model_tool.input
    inputs1._name = "input_tool"
    inputs2 = model_spec.input
    inputs2._name = "input_spec"
    truncated_model_tool = k.Model(inputs = model_tool.input, outputs = model_tool.layers[-2].output)
    truncated_model_spec = k.Model(inputs = model_spec.input, outputs = model_spec.layers[-2].output)
    mergedOut = k.layers.Concatenate()([truncated_model_tool.output, truncated_model_spec.output])
    reshape = layers.Reshape((1, 1024))(mergedOut)
    gru = layers.GRU(256, dropout=0.1)(reshape)
    output = layers.Dense(num_classes, activation="softmax", name="output_mult")(gru)
    return k.Model(inputs=[inputs1 ,inputs2], outputs=output)


#Run experiment

def launch_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )

    checkpoint_filepath = "/tmp/checkpoint"
    checkpoint_callback = k.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
    )


    history = model.fit(
        dataset_train,
        validation_data=dataset_val,
        epochs=num_epochs,
        shuffle=False,
        callbacks=[checkpoint_callback],
    )

    model.load_weights(checkpoint_filepath)
    _, accuracy = model.evaluate(dataset_test)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, model

In [None]:
#Dataset acquisition
train_df = pd.read_csv('../Data/Labels/train.csv', index_col=0)
test_df = pd.read_csv('../Data/Labels/test.csv', index_col=0)
val_df = pd.read_csv('../Data/Labels/val.csv', index_col=0)

train_df['tool'] = train_df.index.map(lambda id: f'../Data/Datasets/tool/{id}.jpg')
test_df['tool'] = test_df.index.map(lambda id: f'../Data/Datasets/tool/{id}.jpg')
val_df['tool'] = val_df.index.map(lambda id: f'../Data/Datasets/tool/{id}.jpg')

train_df['spec_x'] = train_df.index.map(lambda id: f'../Data/Datasets/specX/{id}.png')
test_df['spec_x'] = test_df.index.map(lambda id: f'../Data/Datasets/specX/{id}.png')
val_df['spec_x'] = val_df.index.map(lambda id: f'../Data/Datasets/specX/{id}.png')

train_df['spec_y'] = train_df.index.map(lambda id: f'../Data/Datasets/specY/{id}.png')
test_df['spec_y'] = test_df.index.map(lambda id: f'../Data/Datasets/specY/{id}.png')
val_df['spec_y'] = val_df.index.map(lambda id: f'../Data/Datasets/specY/{id}.png')

train_df['spec_z'] = train_df.index.map(lambda id: f'../Data/Datasets/specZ/{id}.png')
test_df['spec_z'] = test_df.index.map(lambda id: f'../Data/Datasets/specZ/{id}.png')
val_df['spec_z'] = val_df.index.map(lambda id: f'../Data/Datasets/specZ/{id}.png')

In [None]:
#Read tool images and cpectrograms, convert them to NumPy array
x_train_tool = read_tools(train_df.tool.values, image_size, in_channel_tool)
x_test_tool = read_tools(test_df.tool.values, image_size, in_channel_tool)
x_val_tool = read_tools(val_df.tool.values, image_size, in_channel_tool)
x_train_spec = read_specs(train_df.spec_x.values,train_df.spec_y.values,train_df.spec_z.values, image_size, in_channel_spec)
x_test_spec = read_specs(test_df.spec_x.values,test_df.spec_y.values,test_df.spec_z.values, image_size, in_channel_spec)
x_val_spec = read_specs(val_df.spec_x.values,val_df.spec_y.values,val_df.spec_z.values, image_size, in_channel_spec)

#Bring the labels to an acceptable form
labels_train = train_df.tool_label.values - 1
labels_test = test_df.tool_label.values - 1
labels_val = val_df.tool_label.values - 1

labels_train = tf.keras.utils.to_categorical(
    labels_train, num_classes, dtype='float32')
labels_test = tf.keras.utils.to_categorical(
    labels_test, num_classes, dtype='float32')
labels_val = tf.keras.utils.to_categorical(
    labels_val, num_classes, dtype='float32')

In [None]:
#Create tensorflow datasets objects and add patch embedding to train dataset
dataset_train = tf.data.Dataset.from_tensor_slices(({'input_2_tool': x_train_tool, 'input_2_spec': x_train_spec}, labels_train))
dataset_test = tf.data.Dataset.from_tensor_slices(({'input_2_tool': x_test_tool, 'input_2_spec': x_test_spec}, labels_test))
dataset_val = tf.data.Dataset.from_tensor_slices(({'input_2_tool': x_val_tool, 'input_2_spec': x_val_spec}, labels_val))

auto = tf.data.AUTOTUNE
inputs1 = k.Input((image_size, image_size, in_channel_tool))
inputs2 = k.Input((image_size, image_size, in_channel_spec))
tool_crop = k.layers.RandomCrop(image_size, image_size)(inputs1)
spec_crop = k.layers.RandomCrop(image_size, image_size)(inputs2)
tool_crop = k.layers.RandomFlip("horizontal")(tool_crop)
spec_crop = k.layers.RandomFlip("horizontal")(spec_crop)

data_augmentation = k.Model(
    inputs={'input_2_tool': inputs1, 'input_2_spec': inputs2},
    outputs={'input_2_tool':tool_crop, 'input_2_spec':spec_crop},
)

dataset_train = generate_datasets(dataset_train, labels_train, is_train=True)
dataset_val = generate_datasets(dataset_val, labels_val)
dataset_test = generate_datasets(dataset_test, labels_test)

In [None]:
#Load inimodal networks and freese them
model_tool = k.models.load_model(
    "../models/tool_aug_tool_opt.h5", compile=False)
model_spec = k.models.load_model(
    "../models/spec_aug_tool_opt.h5", compile=False)

model_spec.trainable = False
model_tool.trainable = False

In [None]:
#Rename layers to divide networks
for layer in model_tool.layers:
    layer._name = layer.name + str('_tool')
for layer in model_spec.layers:
    layer._name = layer.name + str('_spec')

In [None]:
#Build multimodal network
multimodal_conv = multimodal_conv_build(model_tool, model_spec)

In [None]:
#Show summary
multimodal_conv.summary()

In [None]:
#Run training
history, model = launch_experiment(multimodal_conv)

In [None]:
#Plot accuracy history
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
#Plot loss history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
from scipy import interp
from itertools import cycle
from sklearn.metrics import roc_curve, auc
y_score = model.predict(dataset_test)
y_test = labels_test

n_classes = 3
# Plot linewidth.
lw = 2
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure(1)
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["micro"]),
         color='deeppink', linestyle=':', linewidth=4)

plt.plot(fpr["macro"], tpr["macro"],
         label='macro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of class {0} (area = {1:0.2f})'
             ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic multi-class')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import classification_report
from sklearn import metrics
y_classes = np.argmax(y_score, axis=1)
labels_test_f1=np.argmax(labels_test, axis=1)
print(metrics.confusion_matrix(labels_test_f1, y_classes))
print(classification_report(labels_test_f1, y_classes))
fpr, tpr, thresholds = metrics.roc_curve(labels_test_f1, y_classes, pos_label=2)
print(metrics.auc(fpr, tpr))