In [1]:
import warnings
warnings.filterwarnings('ignore')
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input
import numpy as np
from keras import optimizers
from keras.callbacks import ModelCheckpoint
import cv2
from tqdm import tqdm
import random
import os
from sklearn.model_selection import train_test_split 
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer

In [2]:
def prepare_datasets(test_size, validation_size, X, y):
    """Loads data and splits it into train, validation and test sets.
    :param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
    :param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
    :return X_train (ndarray): Input training set
    :return X_validation (ndarray): Input validation set
    :return X_test (ndarray): Input test set
    :return y_train (ndarray): Target training set
    :return y_validation (ndarray): Target validation set
    :return y_test (ndarray): Target test set
    """

    # load data
    #X, y = load_data()

    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    return X_train, X_validation, X_test, y_train, y_validation, y_test

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data = []
labels = []
DATADIR = "/content/drive/MyDrive/data4"

CATEGORIES = ["cough_i", "not cough"]
training_data = []
for category in CATEGORIES:  # do cough and notcough

    path = os.path.join(DATADIR,category)  # create path to cough and notcough
    print(path)
    class_num = CATEGORIES.index(category)  # get the classification  (0 or a 1). 0=cough 1=notcough

    for img in tqdm(os.listdir(path)):  # iterate over each image per cough and notcough
        try:
            img_array = cv2.imread(os.path.join(path,img))  # convert to array
            new_array = cv2.resize(img_array, (224, 224))  # resize to normalize data size
            #image = preprocess_input(new_array)
            training_data.append([new_array, class_num])  # add this to our training_data
        except Exception as e:  # in the interest in keeping the output clean...
            pass
        
random.shuffle(training_data)

X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)
X = np.array(X).reshape(-1, 224, 224, 3)
data = np.array(X, dtype="float32")
labels = np.array(y)
print(labels)
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels)
print(labels)
#(trainX, testX, trainY, testY) = train_test_split(data, labels,
	#test_size=0.20, stratify=labels, random_state=42)
# get train, validation, test splits
trainX, X_validation, testX, trainY, y_validation, testY = prepare_datasets(0.25, 0.2, data, labels)

/content/drive/MyDrive/data4/cough_i


100%|██████████| 1600/1600 [05:23<00:00,  4.95it/s]


/content/drive/MyDrive/data4/not cough


100%|██████████| 1600/1600 [05:22<00:00,  4.96it/s]


[0 1 0 ... 1 0 1]
[[1. 0.]
 [0. 1.]
 [1. 0.]
 ...
 [0. 1.]
 [1. 0.]
 [0. 1.]]


In [5]:
batch_size = 20 #40
epochs = 10 #200

# dimensions of our images.
img_width, img_height = 224, 224

input_tensor = Input(shape=(224,224,3))

#nb_training_samples =  2560  # 1600
#nb_validation_samples =  256 # 400 # Set parameter values

n_targets = 2

#%%
# validation generator configuration
#validation_data_dir = 'wavelets_cough_notcough/testing/'
aug = ImageDataGenerator(
	rotation_range=20,
	zoom_range=0.15,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.15,
	horizontal_flip=True,
	fill_mode="nearest")

In [6]:
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
print('Model loaded.')
base_model.summary()

#%%

top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(n_targets, activation='softmax'))
top_model.summary()


#%%


model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
model.summary()


#%%

num_layers_to_freeze = 15


#%%

for layer in model.layers[:num_layers_to_freeze]:
    layer.trainable = False


model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), 
                      loss='categorical_crossentropy', 
                      metrics=['accuracy'])

# serialize model to JSON
model_json = model.to_json()
model_filename = "/content/drive/MyDrive/model_cough/vgg16_model_{}_frozen_layers.json".format(num_layers_to_freeze)

with open(model_filename, "w") as json_file:
    json_file.write(model_json)
    
    
#%%

filepath = "/content/drive/MyDrive/model_cough/esc50_vgg16_stft_weights_train_last_2_base_layers_best.hdf5"

best_model_checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [best_model_checkpoint]


model.fit(
    aug.flow(trainX, trainY, batch_size=batch_size),
    steps_per_epoch=len(trainX)//batch_size,
    epochs=epochs,
    validation_data=(X_validation, y_validation),
    validation_steps=len(X_validation)//batch_size,
    callbacks=callbacks_list)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model loaded.
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112,

<tensorflow.python.keras.callbacks.History at 0x7f4016133bd0>

In [7]:
# evaluate model on test set
test_loss, test_acc = model.evaluate(testX, testY, verbose=2)
print('\nTest accuracy:', test_acc)
model.save_weights(filepath)
model.save("/content/drive/MyDrive/model_cough/cough_detector.model", save_format="h5")

25/25 - 9s - loss: 0.0225 - accuracy: 0.9950

Test accuracy: 0.9950000047683716
