In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
from packaging import version

import tensorflow as tf
import tensorflow.keras as keras


print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

%load_ext tensorboard



# Run anaconda prompt as admin (search "anaconda prompt" in the search bar on windows and right click to run as admin)
# Run the following lines in the anaconda cmd to install missing dependencies
# Conda install keras
# If any other module is missing, try conda install "name of module"

# If anaconda is not installed, follow this guide: https://docs.anaconda.com/anaconda/install/windows/

TensorFlow version:  2.1.0


In [2]:
# Import all packages/libraries needed
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import glob
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from concurrent import futures
import threading

# The following is important for the model building itself
import keras.backend as K

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, TimeDistributed, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.utils import to_categorical



directory = os.getcwd()

Using TensorFlow backend.


In [3]:
class directory:
    closed_v = os.path.join (directory , 'Closed_Valve')
    open_v = os.path.join (directory , 'Open_Valve')

In [4]:
class hypParam:
    batch_size = 8
    nr_epochs = 45
    verbose = 1
    lr = 0.00001

In [5]:
class modelName:
    model_name = 'mitral_valve_model.h5'

In [6]:
# The dataset is "labeled" in the sense that there are two folders which contain pictures of a closed valve and an open valve
# This snippet of code will navigate to those folders and read in all the pictures in jpg format
Closed_Valve = glob.glob (directory.closed_v + '/*.jpg' )
Open_Valve = glob.glob (directory.open_v + '/*.jpg' )

In [7]:
# Create pandas dataframe, where file and label are used as input array and list respectively, in train_test_split.

df = pd.DataFrame ({
    'file' : Closed_Valve + Open_Valve,
    'label' : ['Closed_Valve'] * len (Closed_Valve)  +  ['Open_Valve'] * len (Open_Valve) 
     }). sample (frac = 1 , random_state = 0 ). reset_index (drop = True)

In [8]:
# Gather the image from its directory and insert into main memory for subsequent processing 
# Split the dataset into three parts.
# 60 percent of the whole dataset is reserved as a training set, while the testing and validation set are 20 percent each


x_train, y_train, x_test, y_test = train_test_split(df['file'].values,
                                                                      df['label'].values, 
                                                                      test_size=0.2, random_state=42)

x_train, y_val, x_test, y_val_test = train_test_split(x_train, x_test, 
                                                                    test_size=0.2, random_state=42)

# In order - nr of samples, number pictures in validation set and nr of pictures in test set
print(x_train.shape, y_val.shape, y_train.shape)
print('Train:', Counter(x_test), '\nValidation Set:', Counter(y_val_test), '\n Test Set:', Counter(y_test))

# A quick google search yielded the dimensions 224x224 for width and height as default input for CNN models
img_dims = (224, 224)

# Accelarating data loading by reading in image data on parallell threads. idx = index
def get_img_data_parallel(idx, img, total_imgs):
    if idx % 100 == 0 or idx == (total_imgs - 1):
        # Utilize the threading.current_thread() function in order to determine which thread has performed this task.
        print('{}: Processing image number: {}'.format(threading.current_thread().name,
                                                  idx))
    img = cv2.imread(img)
    
    # There are several interpolation methods, this uses a bicubic interpolation over 4×4 pixel neighborhood
    img = cv2.resize(img, dsize=img_dims, 
                     interpolation=cv2.INTER_CUBIC)
    
    # Reads the image in as an array
    img = np.array(img, dtype=np.float32)
    return img

# https://tutorialedge.net/python/concurrency/python-threadpoolexecutor-tutorial/
# Instantiating our own threadpoolexecutor
executor = futures.ThreadPoolExecutor(max_workers=None)


# This returns a list with images and their corresponding indexes
# Enumerate: for each index and image Iterate over indices and items of a list
x_data_inp = [(idx, img, len(x_train)) for idx, img in enumerate(x_train)]
y_val_inp = [(idx, img, len(y_val)) for idx, img in enumerate(y_val)]
test_data_inp = [(idx, img, len(y_train)) for idx, img in enumerate(y_train)]


(144,) (36,) (46,)
Train: Counter({'Open_Valve': 76, 'Closed_Valve': 68}) 
Validation Set: Counter({'Open_Valve': 19, 'Closed_Valve': 17}) 
 Test Set: Counter({'Open_Valve': 26, 'Closed_Valve': 20})


In [9]:
# Load training images into main memory

print('\nLoading Train Images:\n')
# "executor.map() function returns results in the same order as the list of data we gave it to process"
x_data_map = executor.map(get_img_data_parallel, 
                        [record[0] for record in x_data_inp],
                        [record[1] for record in x_data_inp],
                        [record[2] for record in x_data_inp])
x_data = np.array(list(x_data_map))


Loading Train Images:

ThreadPoolExecutor-0_0: Processing image number: 0
ThreadPoolExecutor-0_31: Processing image number: 100
ThreadPoolExecutor-0_1: Processing image number: 143


In [10]:
# Load the validation images into the main memory 
print('\nLoading Validation Images:\n')

# Quote on executor.map(): "It does all the hard work of splitting up the list, 
# sending the sub-lists off to each child process, running the child processes, 
# and combining the results"
y_val_map = executor.map(get_img_data_parallel, 
                        [record[0] for record in y_val_inp],
                        [record[1] for record in y_val_inp],
                        [record[2] for record in y_val_inp])
y_val_data = np.array(list(y_val_map))


Loading Validation Images:

ThreadPoolExecutor-0_17: Processing image number: 0
ThreadPoolExecutor-0_8: Processing image number: 35


In [11]:
# Load test set into main memory
print('\nLoading Test Images:\n')
test_data_map = executor.map(get_img_data_parallel, 
                        [record[0] for record in test_data_inp],
                        [record[1] for record in test_data_inp],
                        [record[2] for record in test_data_inp])
test_data = np.array(list(test_data_map))



Loading Test Images:

ThreadPoolExecutor-0_67: Processing image number: 0
ThreadPoolExecutor-0_26: Processing image number: 45


In [12]:
# Scale/normalize images, pixels can have intensity up to 255
x_imgs_scaled = x_data / 255.
val_imgs_scaled = y_val_data / 255.

In [13]:
# Defining the CNN architechture - 4 convolutional layers and 1 fully connected layer
# Instantiating the model
model = keras.models.Sequential([
    keras.layers.Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=(224,224,3)),
    keras.layers.Conv2D(32, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    
    keras.layers.Dense(1024, activation='relu'),
    
    keras.layers.Flatten(input_shape=x_data.shape[1:]),
    keras.layers.Dense(2, activation='softmax'),
    
])


In [14]:
# This normalizes labels. Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
le = LabelEncoder()
le.fit(x_test)
x_test_enc = le.transform(x_test)
y_val_test_enc = le.transform(y_val_test)
y_test_enc = le.transform(y_test)

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 16)      448       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 220, 220, 32)      4640      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 110, 110, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 108, 108, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 128)       0

In [15]:
# Our labels are strings. This is not readable as is, so conversion into numeric form is essential
# In other words - converts a class vector (integers), to a binary class matrix
# Documentation: https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical


x_test_enc = to_categorical(x_test_enc)
y_val_test_enc = to_categorical(y_val_test_enc)
y_val_test_enc.shape
x_test_enc.shape

(144, 2)

In [16]:
class Adam:
    opt = keras.optimizers.Adam(learning_rate=hypParam.lr)

In [17]:
# Before the model can be trained, it needs to be compiled
# This will group layers into an object with training features
# Documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
model.compile(loss='categorical_crossentropy',
              optimizer=Adam.opt,
              metrics=['accuracy'])


In [18]:
# Alternative to tensorboard in graphing architecture

#import pydotplus
#from keras.utils.vis_utils import pydot
#from keras.utils.vis_utils import model_to_dot
#keras.utils.vis_utils.pydot = pydot
#keras.utils.plot_model(model, "CNN_graph.png", show_shapes=True)

In [19]:

# Creates directory automatically with date and time stamp for easy navigation
# logdir = os.path.join('logs\\fit' + datetime.now().strftime("%Y%m%d-%H%M%S"))

logdir="logs\\fit" # + datetime.now().strftime("%Y%m%d-%H%M%S")

# Define the Keras TensorBoard callback.
summary = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=1, write_graph=True, write_images=False,
    update_freq='epoch',)

In [20]:
# Training the model
history = model.fit(x_imgs_scaled, x_test_enc, batch_size=hypParam.batch_size, epochs=hypParam.nr_epochs, verbose = hypParam.verbose, validation_data=(val_imgs_scaled, y_val_test_enc), callbacks = [summary])

Train on 144 samples, validate on 36 samples
Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45
Epoch 16/45
Epoch 17/45
Epoch 18/45
Epoch 19/45
Epoch 20/45
Epoch 21/45
Epoch 22/45
Epoch 23/45
Epoch 24/45
Epoch 25/45
Epoch 26/45
Epoch 27/45
Epoch 28/45
Epoch 29/45
Epoch 30/45
Epoch 31/45
Epoch 32/45
Epoch 33/45
Epoch 34/45
Epoch 35/45
Epoch 36/45
Epoch 37/45
Epoch 38/45
Epoch 39/45
Epoch 40/45
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45


In [21]:
# Using the trained model to predict and calculate the accuracy of the model on the test set that has not been used yet

y_pred = model.predict(test_data)
y_pred = y_pred.argmax(axis=1)
accuracy_score(y_test_enc, y_pred)

0.9782608695652174

In [22]:
# Save the model, this allows for reusability
model.save(modelName.model_name)

In [23]:
# Print the classification report
target_names = ['Closed_Valve' , 'Open_Valve']
report=classification_report(y_test_enc,y_pred,target_names=target_names)
print(report)

              precision    recall  f1-score   support

Closed_Valve       0.95      1.00      0.98        20
  Open_Valve       1.00      0.96      0.98        26

    accuracy                           0.98        46
   macro avg       0.98      0.98      0.98        46
weighted avg       0.98      0.98      0.98        46



In [24]:
# This calls the tensorboard with the desired directory 
# If it times out during run, just wait a few minutes and run this cell once more

# If tensorboard has an error connecting to localhost - which it might have if you restart the computer (it did for me),
# follow these steps:
# 1) Run cmd
# Then type the following on two separate lines
# 2) taskkill /im tensorboard.exe /f
# 3) del /q %TMP%\.tensorboard-info\*

%tensorboard --logdir logs\\fit

Reusing TensorBoard on port 6006 (pid 9944), started 2:21:57 ago. (Use '!kill 9944' to kill it.)