In [10]:
import os
import numpy as np
import cv2
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from scipy import stats
import pickle
import pandas as pd
import seaborn as sn

from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score

DATA_DIR = 'covid19_pocus_ultrasound/data/cross_validation'
MODEL_DIR = 'covid19_pocus_ultrasound/models'

### Note: This is what the authors resized images to, we can play with this (or change it)
### all depends on what resolution our pretrained model uses 

IMG_WIDTH = 224
IMG_HEIGHT = 224


### This is which "fold" of the cross validation. Rn I just set this as the first one so we do normal
### 80/20 train/val split, but we can iterate over this for robust n-fold validation
FOLD = 0

In [11]:
### Data for getting images from https://github.com/jrosebr1/imutils/blob/master/imutils/paths.py
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")

def list_images(basePath, contains=None):
    # return the set of files that are valid
    return list_files(basePath, validExts=image_types, contains=contains)


def list_files(basePath, validExts=None, contains=None):
    # loop over the directory structure
    for (rootDir, dirNames, filenames) in os.walk(basePath):
        # loop over the filenames in the current directory
        for filename in filenames:
            # if the contains string is not none and the filename does not contain
            # the supplied string, then ignore the file
            if contains is not None and filename.find(contains) == -1:
                continue

            # determine the file extension of the current file
            ext = filename[filename.rfind("."):].lower()

            # check to see if the file is an image and should be processed
            if validExts is None or ext.endswith(validExts):
                # construct the path to the image and yield it
                imagePath = os.path.join(rootDir, filename)
                yield imagePath

In [12]:
print('Loading images...')
imagePaths = list(list_images(DATA_DIR))

train_labels, test_labels = [], []
train_data, test_data = [], []

print(imagePaths[0:5])

Loading images...
['covid19_pocus_ultrasound/data/cross_validation/split4/pneumonia/pneu-gred-6.gif_frame28.jpg', 'covid19_pocus_ultrasound/data/cross_validation/split4/pneumonia/pneu-gred-6.gif_frame14.jpg', 'covid19_pocus_ultrasound/data/cross_validation/split4/pneumonia/pneu-radiopaeda.mp4_frame210.jpg', 'covid19_pocus_ultrasound/data/cross_validation/split4/pneumonia/pneu-radiopaeda.mp4_frame170.jpg', 'covid19_pocus_ultrasound/data/cross_validation/split4/pneumonia/pneu-gred-6.gif_frame16.jpg']


In [13]:
# loop over folds
for imagePath in imagePaths:

    path_parts = imagePath.split(os.path.sep)
    
    # extract the split
    train_test = path_parts[-3][-1]
    
    # extract the class label from the filename
    label = path_parts[-2]
    
    # load the image, swap color channels, and resize it to be a fixed
    # 224x224 pixels while ignoring aspect ratio
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))

    # update the data and labels lists, respectively
    if train_test == str(FOLD):
        test_labels.append(label)
        test_data.append(image)
    else:
        train_labels.append(label)
        train_data.append(image)

# Prepare data for model
print(
    f'\nNumber of training samples: {len(train_labels)} \n'
    f'Number of validation samples: {len(test_labels)}'
)



Number of training samples: 701 
Number of validation samples: 210


In [14]:
from collections import Counter

### ok so these are all strings.. great
print(Counter(train_labels).keys())
print(Counter(train_labels).values())

dict_keys(['pneumonia', 'regular', 'covid'])
dict_values([219, 195, 287])


In [15]:
from sklearn import preprocessing
labEncoder = preprocessing.LabelEncoder()

labEncoder.fit(np.unique(train_labels))

train_num_labels = labEncoder.transform(train_labels)
test_num_labels = labEncoder.transform(test_labels)

In [16]:
'''
Sets up train and val set, using one hot encoding
'''
from keras.utils import to_categorical

x_train = np.asarray(train_data)
y_train = to_categorical(np.asarray(train_num_labels))

x_val = np.asarray(test_data)
y_val = to_categorical(np.asarray(test_num_labels))

In [17]:
from keras.applications.vgg16 import VGG16
import tensorflow as tf

In [31]:
from keras.layers import GlobalAveragePooling2D, Dense
from keras.optimizers import SGD

base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(include_top=False)
average_layer = tf.keras.layers.GlobalAveragePooling2D()
batch_norm = tf.keras.layers.BatchNormalization()
prediction_layer = tf.keras.layers.Dense(3, activation='softmax')

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.Dropout(0.5),
    average_layer,
    batch_norm,
    prediction_layer
])

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Model) (None, None, None, 1280)  2257984   
_________________________________________________________________
dropout_10 (Dropout)         (None, None, None, 1280)  0         
_________________________________________________________________
global_average_pooling2d_6 ( (None, 1280)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1280)              5120      
_________________________________________________________________
dense_9 (Dense)              (None, 3)                 3843      
Total params: 2,266,947
Trainable params: 2,230,275
Non-trainable params: 36,672
_________________________________________________________________


In [32]:
model.fit(x=x_train, y=y_train, epochs = 15, validation_data=(x_val, y_val))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1843fe640>

In [33]:
'''
Evaluate on the val set, which is a bit sketch, but can be fixed when we get more data
'''
model.evaluate(x=x_val, y=y_val)




[1.7866321802139282, 0.7428571581840515]

In [34]:
y_pred = model.predict(x_val)

In [40]:
# confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_num_labels, y_pred.argmax(axis=1))
print(cm)

[[87  2 23]
 [20 38  0]
 [ 8  1 31]]
