### Trains model

In [None]:
import os 
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
import efficientnet.keras as efn
import tensorflow as tf
import keras

CFG_NAME = "eb0" # name of the configuration
ROOT_DIR = os.path.abspath("../")


DATASET_NAME = "isolated-beat-images" 
TRAIN_DATA_PATH = os.path.join(ROOT_DIR, "datasets", DATASET_NAME ,"train")
VALID_DATA_PATH = os.path.join(ROOT_DIR, "datasets", DATASET_NAME ,"val")
TEST_DATA_PATH = os.path.join(ROOT_DIR, "datasets", DATASET_NAME ,"test")


if not os.path.exists(os.path.join(ROOT_DIR, "logs")):
    os.mkdir(os.path.join(ROOT_DIR, "logs"))
    
# Make log path to store all results
LOG_PATH = os.path.join(ROOT_DIR, "logs", CFG_NAME)
    
# Make new directory
if not os.path.exists(LOG_PATH):
    os.mkdir(LOG_PATH)
    
       
# Open log file
log_file = open("{}/{}_log.out".format(LOG_PATH, CFG_NAME), 'w')
#sys.stdout = log_file

### Load dataset

In [None]:
import keras_preprocessing
from keras_preprocessing import image
from keras_preprocessing.image import ImageDataGenerator


def processing_func(img):
    # do sth
    return img


# Size of image
image_size = (128, 192)
batch_size = 64

# Use augmentations
training_datagen = ImageDataGenerator(
        rescale = 1./255,
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest',
        preprocessing_function=None)


# No augmentations
validation_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = training_datagen.flow_from_directory(
    TRAIN_DATA_PATH,
    target_size=image_size,
    class_mode='categorical',
    batch_size = batch_size
)

validation_generator = validation_datagen.flow_from_directory(
    VALID_DATA_PATH,
    target_size=image_size,
    class_mode='categorical',
    batch_size = batch_size
)

test_generator = test_datagen.flow_from_directory(
    TEST_DATA_PATH,
    target_size=image_size,
    class_mode='categorical',
    batch_size = batch_size,
    shuffle=False
)

In [None]:
x, y = next(iter(test_generator))
x.shape, y.shape

In [None]:
print(y[0])
plt.imshow(x[0])

### Define model

In [None]:
from keras.layers import Input, Conv2D, Dense
from keras.models import Model
from keras.optimizers import Adam

def classification_network():
    
    # Pre-trained model
    base_model = efn.EfficientNetB0(weights='imagenet',include_top=False,pooling='avg',input_shape=(128, 192, 3))
    
    # Add FC layer
    predictions = Dense(2, activation='softmax', trainable=True)(base_model.output) 
    
    # Unfreeze layers
    for layer in base_model.layers:
        layer.trainable=True
    
    # Build model
    model = Model(inputs=[base_model.input], outputs=[predictions])
        
    # Optimzer
    optim = Adam(lr=1e-5)

    # Loss function
    loss_func = 'binary_crossentropy' 
    
    model.compile(optimizer=optim, loss=loss_func, metrics=['accuracy'])
    return model

model = None
model = classification_network()
model.summary()

### Set callbacks

In [None]:
# Define callbacks for learning rate scheduling, logging and best checkpoints saving
callbacks = [
    keras.callbacks.ModelCheckpoint('{}/{}.h5'.format(LOG_PATH, CFG_NAME), verbose=1, monitor='val_accuracy', save_best_only=True, mode='max'),
    keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, verbose=1, patience=5, mode='max'),
    keras.callbacks.CSVLogger('{}/training.csv'.format(LOG_PATH)),
    keras.callbacks.EarlyStopping(monitor='val_accuracy', verbose=1, patience=8, restore_best_weights=True)
]

### Class weights for data imbalance

In [None]:
#Define class weights for imbalacned data
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', np.unique(train_generator.classes), train_generator.classes)
print(class_weights)

### Train model

In [None]:
start_time = time.time()

history = model.fit_generator(train_generator, 
                    epochs=500, validation_data = validation_generator, 
                    verbose = 1,
                    class_weight=class_weights,
                    callbacks=callbacks, shuffle=True)

end_time = time.time()
print("--- Time taken to train : %s min ---" % ((end_time - start_time)//60))

In [None]:
# Plot and save accuravy loss graphs individually
def plot_loss_accu(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'g')
    plt.plot(epochs, val_loss, 'y')
    #plt.title('Training and validation loss')
    plt.ylabel('Loss %')
    plt.xlabel('Epoch')
    plt.legend(['train', 'val'], loc='upper right')
    plt.grid(True)
    plt.savefig('{}/{}_loss.jpg'.format(LOG_PATH, CFG_NAME), dpi=100)
    plt.show()
    
    loss = history.history['accuracy']
    val_loss = history.history['val_accuracy']
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'r')
    plt.plot(epochs, val_loss, 'b')
    #plt.title('Training and validation accuracy')
    plt.ylabel('Accuracy %')
    plt.xlabel('Epoch')
    plt.legend(['train', 'val'], loc='lower right')
    plt.grid(True)
    plt.savefig('{}/{}_acc.jpg'.format(LOG_PATH, CFG_NAME), dpi=100)
    plt.show()

plot_loss_accu(history)
print("Done training and logging!")

### Load best weights and test model performance

In [None]:
from keras.models import load_model

model = None
model = load_model("{}/{}.h5".format(LOG_PATH, CFG_NAME), compile = False)

In [None]:
from keras.utils import np_utils

validation_generator = validation_datagen.flow_from_directory(
    VALID_DATA_PATH,
    target_size=image_size,
    class_mode='categorical',
    batch_size = batch_size,
    shuffle=False
)

# Validation labels
y_test_flat = validation_generator.classes
y_test = np_utils.to_categorical(y_test_flat, 2)

# Make preds
y_pred = model.predict_generator(validation_generator)
# Get labels from predictions
y_pred_flat = np.array([np.argmax(pred) for pred in y_pred]) 

assert y_test_flat.shape == y_pred_flat.shape, "Shape mismatch!"


from sklearn.metrics import accuracy_score

# Accuracy
acc = accuracy_score(y_test_flat, y_pred_flat) * 100
print("Accuracy :", acc)


from sklearn.metrics import classification_report

# Classification report
target_names = ['0', '1']
print(classification_report(y_test_flat, y_pred_flat, target_names=target_names))

In [None]:
y_test.shape, y_pred.shape

In [None]:
from sklearn.metrics import roc_auc_score
print('Area under ROC curve : ', roc_auc_score(y_test, y_pred) *100 )

In [None]:
from sklearn.metrics import roc_curve, auc
# https://github.com/hasibzunair/uniformizing-3D/blob/master/graph_compare.ipynb

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(2):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])


# Compute micro-average ROC curve and ROC area
cls = 1 # class name
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

#print(roc_auc)
print("Area under the ROC curve for positive class:", roc_auc[cls]*100)


plt.figure()
lw = 2 # line width
plt.plot(fpr[cls], tpr[cls], color='darkorange', lw=lw, label='ROC curve (area = {0:.2f}%)'.format(roc_auc[cls]*100))
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

### Test peformance

In [None]:
# Test labels
y_test_flat = test_generator.classes
y_test = np_utils.to_categorical(y_test_flat, 2)

# Make preds
y_pred = model.predict_generator(test_generator)
# Get labels from predictions
y_pred_flat = np.array([np.argmax(pred) for pred in y_pred]) 

assert y_test_flat.shape == y_pred_flat.shape, "Shape mismatch!"


from sklearn.metrics import accuracy_score

# Accuracy
acc = accuracy_score(y_test_flat, y_pred_flat) * 100
print("Accuracy :", acc)


from sklearn.metrics import classification_report

# Classification report
target_names = ['0', '1']
print(classification_report(y_test_flat, y_pred_flat, target_names=target_names))

In [None]:
print("------------------------------------End of script------------------------------------")