# Parkinson Disease Detection from Audio using Deep Learning Techniques

## Imports

In [None]:
import librosa
import librosa.display
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Reshape, LSTM, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from keras_tuner import Hyperband
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score

## Reading and Converting Data to MFCCs

In [None]:
parkinson_dir = "train_data/parkinson"
healthy_dir = "train_data/healthy"

parkinson_dir_test = "test_data/parkinson"
healthy_dir_test = "test_data/healthy"


def extract_mfcc(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=100, hop_length=50)
    return mfcc


def load_data_from_dir(folder):
    mfccs = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith(".wav"):
            file_path = os.path.join(folder, filename)
            mfcc = extract_mfcc(file_path)
            mfccs.append(mfcc)
            labels.append(1 if "parkinson" in folder else 0)
    return mfccs, labels


parkinson_mfccs, parkinson_labels = load_data_from_dir(parkinson_dir)
healthy_mfccs, healthy_labels = load_data_from_dir(healthy_dir)

parkinson_mfccs_test, parkinson_labels_test = load_data_from_dir(parkinson_dir_test)
healthy_mfccs_test, healthy_labels_test = load_data_from_dir(healthy_dir_test)

In [None]:
min_length = min(min(mfcc.shape[1] for mfcc in parkinson_mfccs), min(mfcc.shape[1] for mfcc in healthy_mfccs))

def truncate_mfccs(parkinson_mfccs, healthy_mfccs):
    return [mfcc[:, :min_length] for mfcc in parkinson_mfccs], [mfcc[:, :min_length] for mfcc in healthy_mfccs] 


parkinson_mfccs, healthy_mfccs = truncate_mfccs(parkinson_mfccs, healthy_mfccs)
parkinson_mfccs_test, healthy_mfccs_test = truncate_mfccs(parkinson_mfccs_test, healthy_mfccs_test)


X_train = np.concatenate((parkinson_mfccs, healthy_mfccs), axis=0)
y_train = np.concatenate((parkinson_labels, healthy_labels), axis=0)

X_test = np.concatenate((parkinson_mfccs_test, healthy_mfccs_test), axis=0)
y_test = np.concatenate((parkinson_labels_test, healthy_labels_test), axis=0)

## Preparing Data for Deep Learning Model

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

## Deep Learning Modeling and Evaluation

### ANN Model

In [None]:
X_train_ann = X_train.reshape(81, 13 * 243)
X_test_ann = X_test.reshape(20, 13 * 243)

In [None]:
def build_model(hp):
    model = Sequential()

    model.add(Input(shape=(13 * 243,)))

    for i in range(hp.Int('num_layers', 1, 5)):

        model.add(Dense(hp.Int(f'units_{i}', min_value=64, max_value=512, step=32), activation='relu'))
    
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
tuner = Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=5,
    factor=3,
    directory='tuners',
    project_name='ann_tuners'
)

tuner.search(X_train_ann, y_train, validation_data=(X_test_ann, y_test))

In [None]:
ann = tuner.get_best_models(num_models=1)[0]
ann.summary()

In [None]:
history = ann.fit(X_train_ann, y_train, epochs=15, validation_data=(X_test_ann, y_test))

In [None]:
ann.save("models/ann.keras")

### ANN Model Evaluation

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.plot(history.history['loss'], label='Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_title('Loss and Validation Loss over Epochs')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.set_ylim(0, 60)
ax1.set_xlim(0, 15)
ax1.set_xticks(np.arange(0, 15, 1))
ax1.legend()

ax2.plot(history.history['accuracy'], label='Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_title('Accuracy and Validation Accuracy over Epochs')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(0, 1)
ax2.set_xlim(0, 15)
ax2.set_xticks(np.arange(0, 15, 1))
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/ann_accuracy_loss_plots.png')

In [None]:
ann = load_model("models/ann.keras")

y_train_pred = (ann.predict(X_train_ann) > 0.5).astype("int32")
y_test_pred = (ann.predict(X_test_ann) > 0.5).astype("int32")

ann_train_confusion_matrix = confusion_matrix(y_train, y_train_pred)
ann_train_accuracy = round(accuracy_score(y_train, y_train_pred), 2)
ann_train_recall = round(recall_score(y_train, y_train_pred), 2)
ann_train_precision = round(precision_score(y_train, y_train_pred), 2)
ann_train_f1 = round(f1_score(y_train, y_train_pred), 2)

ann_test_confusion_matrix = confusion_matrix(y_test, y_test_pred)
ann_test_accuracy = round(accuracy_score(y_test, y_test_pred), 2)
ann_test_recall = round(recall_score(y_test, y_test_pred), 2)
ann_test_precision = round(precision_score(y_test, y_test_pred), 2)
ann_test_f1 = round(f1_score(y_test, y_test_pred), 2)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

sns.heatmap(ann_train_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Train Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

sns.heatmap(ann_test_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[1])
axes[1].set_title('Test Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/ann_confusion_matrix.png')

### CNN Model

In [None]:
def build_model(hp):
    model = Sequential()
    
    model.add(Input(shape=(13, 243, 1)))

    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Conv2D(
            filters=hp.Int('conv_1_filters', min_value=8, max_value=24, step=4),
            padding='same',
            kernel_size=(3, 3),
            activation='relu',
        ))
        model.add(MaxPooling2D((2, 2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [None]:
tuner = Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=5,
    factor=3,
    directory='tuners',
    project_name='cnn_tuners'
)

tuner.search(X_train, y_train, validation_data=(X_test, y_test))

In [None]:
cnn = tuner.get_best_models(num_models=1)[0]
cnn.summary()

In [None]:
history = cnn.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

In [None]:
cnn.save("models/cnn.keras")

### CNN Model Evaluation

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.plot(history.history['loss'], label='Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_title('Loss and Validation Loss over Epochs')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.set_ylim(0, 40)
ax1.set_xlim(0, 15)
ax1.set_xticks(np.arange(0, 15, 1))
ax1.legend()

ax2.plot(history.history['accuracy'], label='Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_title('Accuracy and Validation Accuracy over Epochs')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(0, 1)
ax2.set_xlim(0, 15)
ax2.set_xticks(np.arange(0, 15, 1))
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/cnn_accuracy_loss_plots.png')

In [None]:
cnn = load_model("models/cnn.keras")

y_train_pred = (cnn.predict(X_train) > 0.5).astype("int32")
y_test_pred = (cnn.predict(X_test) > 0.5).astype("int32")

cnn_train_confusion_matrix = confusion_matrix(y_train, y_train_pred)
cnn_train_accuracy = round(accuracy_score(y_train, y_train_pred), 2)
cnn_train_recall = round(recall_score(y_train, y_train_pred), 2)
cnn_train_precision = round(precision_score(y_train, y_train_pred), 2)
cnn_train_f1 = round(f1_score(y_train, y_train_pred), 2)

cnn_test_confusion_matrix = confusion_matrix(y_test, y_test_pred)
cnn_test_accuracy = round(accuracy_score(y_test, y_test_pred), 2)
cnn_test_recall = round(recall_score(y_test, y_test_pred), 2)
cnn_test_precision = round(precision_score(y_test, y_test_pred), 2)
cnn_test_f1 = round(f1_score(y_test, y_test_pred), 2)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

sns.heatmap(cnn_train_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Train Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

sns.heatmap(cnn_test_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[1])
axes[1].set_title('Test Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/cnn_confusion_matrix.png')

### CNN-LSTM Hybrid Model

In [None]:
def build_model(hp):
    layer_count = 0
    model = Sequential()
    
    model.add(Input(shape=(13, 243, 1)))

    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Conv2D(
            hp.Int(f'filters_{i}', min_value=8, max_value=32, step=4),
            (3, 3),
            padding='same',
            activation='relu'
        ))
        layer_count += 1
    
    model.add(Reshape((-1, 243)))
    layer_count += 1

    for i in range(hp.Int('num_layers', 1, 2)):
        model.add(LSTM(
            hp.Int(f'lstm_units_{layer_count + i}', min_value=8, max_value=32, step=4),
            return_sequences=True
        ))
        layer_count += 1
    
    model.add(LSTM(
        hp.Int(f'lstm_units{layer_count}', min_value=8, max_value=32, step=4),
        return_sequences=False
    ))

    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2)),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [None]:
tuner = Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=5,
    factor=3,
    directory='tuners',
    project_name='hybrid_tuners'
)

tuner.search(X_train, y_train, validation_data=(X_test, y_test))

In [None]:
hybrid = tuner.get_best_models(num_models=1)[0]
hybrid.summary()

In [None]:
history = hybrid.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

In [None]:
hybrid.save("models/hybrid.keras")

### CNN-LSTM Hybrid Model Evaluation

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.plot(history.history['loss'], label='Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_title('Loss and Validation Loss over Epochs')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.set_ylim(0, 5)
ax1.set_xlim(0, 15)
ax1.set_xticks(np.arange(0, 15, 1))
ax1.legend()

ax2.plot(history.history['accuracy'], label='Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_title('Accuracy and Validation Accuracy over Epochs')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.set_ylim(0, 1.1)
ax2.set_xlim(0, 15)
ax2.set_xticks(np.arange(0, 15, 1))
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/hybrid_accuracy_loss_plots.png')

In [None]:
hybrid = load_model("models/hybrid.keras")

y_train_pred = (hybrid.predict(X_train) > 0.5).astype("int32")
y_test_pred = (hybrid.predict(X_test) > 0.5).astype("int32")

hybrid_train_confusion_matrix = confusion_matrix(y_train, y_train_pred)
hybrid_train_accuracy = round(accuracy_score(y_train, y_train_pred), 2)
hybrid_train_recall = round(recall_score(y_train, y_train_pred), 2)
hybrid_train_precision = round(precision_score(y_train, y_train_pred), 2)
hybrid_train_f1 = round(f1_score(y_train, y_train_pred), 2)

hybrid_test_confusion_matrix = confusion_matrix(y_test, y_test_pred)
hybrid_test_accuracy = round(accuracy_score(y_test, y_test_pred), 2)
hybrid_test_recall = round(recall_score(y_test, y_test_pred), 2)
hybrid_test_precision = round(precision_score(y_test, y_test_pred), 2)
hybrid_test_f1 = round(f1_score(y_test, y_test_pred), 2)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

sns.heatmap(hybrid_train_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Train Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

sns.heatmap(hybrid_test_confusion_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[1])
axes[1].set_title('Test Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('models/hybrid_confusion_matrix.png')

## Comparison

In [None]:
import pandas as pd


data = {
    'Model': ['ANN', 'CNN', 'Hybrid'],
    'Train Accuracy': [ann_train_accuracy, cnn_train_accuracy, hybrid_train_accuracy],
    'Test Accuracy': [ann_test_accuracy, cnn_test_accuracy, hybrid_test_accuracy],
    'Train Precision': [ann_train_precision, cnn_train_precision, hybrid_train_precision],
    'Test Precision': [ann_test_precision, cnn_test_precision, hybrid_test_precision],
    'Train Recall': [ann_train_recall, cnn_train_recall, hybrid_train_recall],
    'Test Recall': [ann_test_recall, cnn_test_recall, hybrid_test_recall],
    'Train F1 Score': [ann_train_f1, cnn_train_f1, hybrid_train_f1],
    'Test F1 Score': [ann_test_f1, cnn_test_f1, hybrid_test_f1]
}

df = pd.DataFrame(data)
df


In [None]:
df.to_csv('model_performance_comparison.csv', index=False)