Basic CNN Training by using a basic spectogram

In [87]:
import librosa as lb
from librosa.display import waveplot
from librosa.display import specshow

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

import matplotlib.pyplot as plt

from os import listdir
from os.path import isfile, join

In [45]:
#create images using librosa spectogram
def convert_to_spec_image(file):
    ''' 
        Converts audio file to spec image
        Input file includes path
        Saves the file to a png image in the same directory
    '''
    y, sr = lb.load(file)

    #Plot signal in
    plt.figure(figsize=(10,3))
    src_ft = lb.stft(y)
    src_db = lb.amplitude_to_db(abs(src_ft))
    specshow(src_db, sr=sr, x_axis='time', y_axis='hz')  
    plt.ylim(0, 5000)
    
    filename_img = file.split('.wav')[0]
    plt.savefig(filename_img + '.png')
    plt.close()

In [55]:
def is_wav(filename):
    '''
        Checks if files are .wav files
        Utility tool in converting wav to png files
    '''
    return filename.split('.')[1] == 'wav'

In [57]:
#test the function
test_file = '../data/Respiratory_Files/train_test_split/train/Bronchiectasis/111_1b2_Tc_sc_Meditron.wav'

convert_to_spec_image(test_file)

In [58]:
dis_cat = ['Asthma', 'Bronchiectasis', 'Bronchiolitis', 'COPD', 'Healthy', 'LRTI', 'Pneumonia', 'URTI']

In [59]:
train_loc = '../data/Respiratory_Files/train_test_split/train/'
test_loc = '../data/Respiratory_Files/train_test_split/val/'

In [60]:
#Convert all wav to png in TRAIN folder
for cat in dis_cat:
    files = [f for f in listdir(train_loc + cat) if isfile(join(train_loc + cat, f))]
    for f in files:
        if is_wav(f):
            convert_to_spec_image(train_loc + cat + '/' + f)

In [61]:
#Convert all wav to png in TEST folder
for cat in dis_cat:
    files = [f for f in listdir(test_loc + cat) if isfile(join(test_loc + cat, f))]
    for f in files:
        if is_wav(f):
            convert_to_spec_image(test_loc + cat + '/' + f)

In [88]:
trdata = ImageDataGenerator()
traindata = trdata.flow_from_directory(directory=train_loc, target_size=(224,224))
tsdata = ImageDataGenerator()
testdata = tsdata.flow_from_directory(directory=test_loc, target_size=(224,224))

Found 732 images belonging to 8 classes.
Found 188 images belonging to 8 classes.


In [95]:
vgg16 = VGG16(weights='imagenet')
vgg16.summary()

x  = vgg16.get_layer('fc2').output
prediction = Dense(8, activation='softmax', name='predictions')(x)

model = Model(inputs=vgg16.input, outputs=prediction)

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [96]:
for layer in model.layers:
    layer.trainable = False

for layer in model.layers[-8:]:
    layer.trainable = True
    print("Layer '%s' is trainable" % layer.name)  

Layer 'block5_conv1' is trainable
Layer 'block5_conv2' is trainable
Layer 'block5_conv3' is trainable
Layer 'block5_pool' is trainable
Layer 'flatten' is trainable
Layer 'fc1' is trainable
Layer 'fc2' is trainable
Layer 'predictions' is trainable


In [97]:
opt = Adam(lr=0.00001)
model.compile(optimizer=opt, loss=categorical_crossentropy, metrics=['accuracy'])
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [98]:
checkpoint = ModelCheckpoint("vgg16_res.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')

In [99]:
hist = model.fit_generator(steps_per_epoch=5,generator=traindata, validation_data= testdata, validation_steps=5,epochs=50,callbacks=[checkpoint,early])

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 5 steps, validate for 5 steps
Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.80625, saving model to vgg16_res.h5
Epoch 2/50
Epoch 00002: val_accuracy improved from 0.80625 to 0.86250, saving model to vgg16_res.h5
Epoch 3/50
Epoch 00003: val_accuracy did not improve from 0.86250
Epoch 4/50
Epoch 00004: val_accuracy improved from 0.86250 to 0.88750, saving model to vgg16_res.h5
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.88750
Epoch 6/50
Epoch 00006: val_accuracy did not improve from 0.88750
Epoch 7/50
Epoch 00007: val_accuracy improved from 0.88750 to 0.90000, saving model to vgg16_res.h5
Epoch 8/50
Epoch 00008: val_accuracy did not improve from 0.90000
Epoch 9/50
Epoch 00009: val_accuracy did not improve from 0.90000
Epoch 10/50
Epoch 00010: val_accuracy did not improve from 0.90000
Epoch 11/50
Epoch 00011: val_accuracy did not improve from 0.90000
Epoch 12/50
Epoch 00012: val_accuracy did not imp