Extract mel-spectrogram from audio

In [1]:
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from keras.preprocessing import image
from tensorflow.keras.applications.resnet import ResNet50, preprocess_input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, TimeDistributed, LSTM, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.metrics import Precision, Recall
from sklearn.metrics import accuracy_score, precision_score, recall_score, log_loss
from tensorflow.keras.losses import BinaryCrossentropy
from keras.callbacks import EarlyStopping


import os
import glob
import matplotlib.pyplot as plt


Find graphic specs

In [None]:
def load_data(dir_path):
    X = []
    y = []
    labels = ['PD', 'HC']
    for label in labels:
        path = os.path.join(dir_path, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = img_to_array(load_img(os.path.join(path, img), target_size=(640, 480)))  # Convert image to array
                X.append(preprocess_input(img_arr))  # Preprocess the image using VGG16's preprocess_input method
                y.append(class_num)
            except Exception as e:
                print(e)
    try:
        return np.array(X), np.array(y)
    except Exception as e:
        print(f'Failed to create numpy arrays: {e}')
        return None, None
X, y = load_data('./plots/')
# one-hot encoding
y = to_categorical(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

train

In [None]:
# Define ResNet model
resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(640, 480, 3))

for layer in resnet_model.layers:
    layer.trainable = False


In [None]:
# Add new layers
model = Sequential()
model.add(resnet_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(2, activation='softmax'))  

# Use the Adam optimizer with a specified learning rate
optimizer = Adam(learning_rate=0.001)
model.summary()
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', Precision(), Recall()])
# early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), 
                    epochs=10, batch_size=32)
loss, accuracy, precision, recall = model.evaluate(X_test, y_test, verbose=0)
print('Loss: %.2f' % loss)
print('Accuracy: %.2f%%' % (accuracy * 100))
print('Precision: %.2f%%' % (precision * 100))
print('Recall: %.2f%%' % (recall * 100))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 20, 15, 2048)      23587712  
                                                                 
 flatten (Flatten)           (None, 614400)            0         
                                                                 
 dense (Dense)               (None, 512)               314573312 
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
Total params: 338162050 (1.26 GB)
Trainable params: 314574338 (1.17 GB)
Non-trainable params: 23587712 (89.98 MB)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 20.37
Accuracy: 92.50%


In [None]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager

# Change global font to Times New Roman
font_dirs = ['/usr/share/fonts/truetype/msttcorefonts/', ]
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
# font_list = font_manager.createFontList(font_files)
# font_manager.fontManager.ttflist.extend(font_list)
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 37})

# Plot training & validation accuracy values
plt.figure(figsize=(14,6))
plt.plot(history.history['accuracy'], linewidth=5)
plt.plot(history.history['val_accuracy'], linewidth=5)
plt.title('ResNet-CNN Accuracy')
plt.ylabel('Value')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.figure(figsize=(14,6))
plt.plot(history.history['loss'], linewidth=5)
plt.plot(history.history['val_loss'], linewidth=5)
plt.title('ResNet-CNN Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation precision values
plt.figure(figsize=(14,6))
plt.plot(history.history['precision'])
plt.plot(history.history['val_precision'])
plt.title('Model Precision')
plt.ylabel('Precision')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation recall values
plt.figure(figsize=(14,6))
plt.plot(history.history['recall'])
plt.plot(history.history['val_recall'])
plt.title('Model Recall')
plt.ylabel('Recall')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

NameError: name 'history' is not defined

<Figure size 1400x600 with 0 Axes>