Extract mel-spectrogram from audio

In [1]:
from sklearn.model_selection import KFold
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.metrics import Precision, Recall

import os
import glob


Find graphic specs

In [2]:
 # Get file paths
pd_files = glob.glob("./plots/PD/*.png")
hc_files = glob.glob("./plots/HC/*.png")

train

In [3]:
def drop_alpha(x):
    return x[:,:,:3]

In [4]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(640, 480, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy', Precision(name='precision'), Recall(name='recall')])  # Add precision and recall to the metrics
    return model






In [5]:
# Convert images to numpy arrays
def convert_images_to_array(files):
    images_as_array=[]
    for file in files:
        # Convert image to numpy array
        image = load_img(file, target_size=(640, 480))
        image_arr = img_to_array(image)
        images_as_array.append(image_arr)
    return np.array(images_as_array)

In [6]:

# Convert images to numpy arrays
pd_images = convert_images_to_array(pd_files)
hc_images = convert_images_to_array(hc_files)

# Create labels
pd_labels = np.ones(len(pd_files))
hc_labels = np.zeros(len(hc_files))

# Concatenate data and labels
data = np.concatenate((pd_images, hc_images), axis=0)
labels = np.concatenate((pd_labels, hc_labels), axis=0)

kfold = KFold(n_splits=5, shuffle=True)

for train, test in kfold.split(data, labels):
    scores = []
    model = create_model()
    model.fit(
        data[train], labels[train], epochs=20)
    scores = model.evaluate(data[test], labels[test], verbose=0)
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    print("Precision: %.2f%%" % (scores[2] * 100))
    print("Recall: %.2f%%" % (scores[3] * 100))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 81.82%
Precision: 71.43%
Recall: 100.00%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 70.00%
Precision: 57.14%
Recall: 100.00%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 80.00%
Precision: 100.00%
Recall: 66.67%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 1

In [7]:
print(scores)

[1.3771628141403198, 0.699999988079071, 0.7142857313156128, 0.8333333134651184]
